camel-ai 0.2.65__py3-none-any.whl → 0.2.83a6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

Files changed (509) hide show
  1. camel/__init__.py +3 -3
  2. camel/agents/__init__.py +2 -2
  3. camel/agents/_types.py +9 -4
  4. camel/agents/_utils.py +40 -2
  5. camel/agents/base.py +2 -2
  6. camel/agents/chat_agent.py +5107 -995
  7. camel/agents/critic_agent.py +2 -2
  8. camel/agents/deductive_reasoner_agent.py +56 -56
  9. camel/agents/embodied_agent.py +2 -2
  10. camel/agents/knowledge_graph_agent.py +20 -20
  11. camel/agents/mcp_agent.py +35 -36
  12. camel/agents/multi_hop_generator_agent.py +3 -3
  13. camel/agents/programmed_agent_instruction.py +2 -2
  14. camel/agents/repo_agent.py +4 -3
  15. camel/agents/role_assignment_agent.py +2 -2
  16. camel/agents/search_agent.py +2 -2
  17. camel/agents/task_agent.py +2 -2
  18. camel/agents/tool_agents/__init__.py +2 -2
  19. camel/agents/tool_agents/base.py +2 -2
  20. camel/agents/tool_agents/hugging_face_tool_agent.py +3 -3
  21. camel/benchmarks/__init__.py +2 -2
  22. camel/benchmarks/apibank.py +5 -5
  23. camel/benchmarks/apibench.py +2 -2
  24. camel/benchmarks/base.py +2 -2
  25. camel/benchmarks/browsecomp.py +44 -33
  26. camel/benchmarks/gaia.py +17 -13
  27. camel/benchmarks/mock_website/README.md +1 -3
  28. camel/benchmarks/mock_website/mock_web.py +2 -2
  29. camel/benchmarks/mock_website/requirements.txt +1 -1
  30. camel/benchmarks/mock_website/shopping_mall/app.py +2 -2
  31. camel/benchmarks/mock_website/task.json +1 -1
  32. camel/benchmarks/nexus.py +3 -3
  33. camel/benchmarks/ragbench.py +2 -2
  34. camel/bots/__init__.py +2 -2
  35. camel/bots/discord/__init__.py +2 -2
  36. camel/bots/discord/discord_app.py +2 -2
  37. camel/bots/discord/discord_installation.py +2 -2
  38. camel/bots/discord/discord_store.py +3 -3
  39. camel/bots/slack/__init__.py +2 -2
  40. camel/bots/slack/models.py +4 -4
  41. camel/bots/slack/slack_app.py +2 -2
  42. camel/bots/telegram_bot.py +2 -2
  43. camel/configs/__init__.py +29 -2
  44. camel/configs/aihubmix_config.py +90 -0
  45. camel/configs/aiml_config.py +2 -2
  46. camel/configs/amd_config.py +70 -0
  47. camel/configs/anthropic_config.py +2 -2
  48. camel/configs/base_config.py +2 -2
  49. camel/configs/bedrock_config.py +5 -3
  50. camel/configs/cerebras_config.py +98 -0
  51. camel/configs/cohere_config.py +2 -2
  52. camel/configs/cometapi_config.py +106 -0
  53. camel/configs/crynux_config.py +2 -2
  54. camel/configs/deepseek_config.py +9 -8
  55. camel/configs/function_gemma_config.py +59 -0
  56. camel/configs/gemini_config.py +6 -4
  57. camel/configs/groq_config.py +6 -4
  58. camel/configs/internlm_config.py +6 -4
  59. camel/configs/litellm_config.py +2 -2
  60. camel/configs/lmstudio_config.py +6 -4
  61. camel/configs/minimax_config.py +95 -0
  62. camel/configs/mistral_config.py +2 -2
  63. camel/configs/modelscope_config.py +5 -3
  64. camel/configs/moonshot_config.py +2 -2
  65. camel/configs/nebius_config.py +105 -0
  66. camel/configs/netmind_config.py +2 -2
  67. camel/configs/novita_config.py +2 -2
  68. camel/configs/nvidia_config.py +2 -2
  69. camel/configs/ollama_config.py +2 -2
  70. camel/configs/openai_config.py +5 -3
  71. camel/configs/openrouter_config.py +6 -4
  72. camel/configs/ppio_config.py +2 -2
  73. camel/configs/qianfan_config.py +85 -0
  74. camel/configs/qwen_config.py +2 -2
  75. camel/configs/reka_config.py +2 -2
  76. camel/configs/samba_config.py +6 -4
  77. camel/configs/sglang_config.py +2 -2
  78. camel/configs/siliconflow_config.py +2 -2
  79. camel/configs/togetherai_config.py +2 -2
  80. camel/configs/vllm_config.py +4 -2
  81. camel/configs/watsonx_config.py +2 -2
  82. camel/configs/yi_config.py +6 -4
  83. camel/configs/zhipuai_config.py +6 -4
  84. camel/data_collectors/__init__.py +2 -2
  85. camel/data_collectors/alpaca_collector.py +18 -9
  86. camel/data_collectors/base.py +2 -2
  87. camel/data_collectors/sharegpt_collector.py +2 -2
  88. camel/datagen/__init__.py +2 -2
  89. camel/datagen/cot_datagen.py +3 -3
  90. camel/datagen/evol_instruct/__init__.py +2 -2
  91. camel/datagen/evol_instruct/evol_instruct.py +2 -2
  92. camel/datagen/evol_instruct/scorer.py +12 -12
  93. camel/datagen/evol_instruct/templates.py +16 -16
  94. camel/datagen/self_improving_cot.py +5 -5
  95. camel/datagen/self_instruct/__init__.py +2 -2
  96. camel/datagen/self_instruct/filter/__init__.py +2 -2
  97. camel/datagen/self_instruct/filter/filter_function.py +2 -2
  98. camel/datagen/self_instruct/filter/filter_registry.py +2 -2
  99. camel/datagen/self_instruct/filter/instruction_filter.py +2 -2
  100. camel/datagen/self_instruct/self_instruct.py +2 -2
  101. camel/datagen/self_instruct/templates.py +47 -47
  102. camel/datagen/source2synth/__init__.py +2 -2
  103. camel/datagen/source2synth/data_processor.py +2 -2
  104. camel/datagen/source2synth/models.py +2 -2
  105. camel/datagen/source2synth/user_data_processor_config.py +2 -2
  106. camel/datahubs/__init__.py +2 -2
  107. camel/datahubs/base.py +2 -2
  108. camel/datahubs/huggingface.py +2 -2
  109. camel/datahubs/models.py +2 -2
  110. camel/datasets/__init__.py +2 -2
  111. camel/datasets/base_generator.py +41 -12
  112. camel/datasets/few_shot_generator.py +18 -18
  113. camel/datasets/models.py +2 -2
  114. camel/datasets/self_instruct_generator.py +2 -2
  115. camel/datasets/static_dataset.py +2 -2
  116. camel/embeddings/__init__.py +2 -2
  117. camel/embeddings/azure_embedding.py +2 -2
  118. camel/embeddings/base.py +2 -2
  119. camel/embeddings/gemini_embedding.py +2 -2
  120. camel/embeddings/jina_embedding.py +2 -2
  121. camel/embeddings/mistral_embedding.py +2 -2
  122. camel/embeddings/openai_compatible_embedding.py +2 -2
  123. camel/embeddings/openai_embedding.py +2 -2
  124. camel/embeddings/sentence_transformers_embeddings.py +2 -2
  125. camel/embeddings/together_embedding.py +2 -2
  126. camel/embeddings/vlm_embedding.py +2 -2
  127. camel/environments/__init__.py +14 -2
  128. camel/environments/models.py +2 -2
  129. camel/environments/multi_step.py +2 -2
  130. camel/environments/rlcards_env.py +860 -0
  131. camel/environments/single_step.py +30 -5
  132. camel/environments/tic_tac_toe.py +3 -3
  133. camel/extractors/__init__.py +2 -2
  134. camel/extractors/base.py +2 -2
  135. camel/extractors/python_strategies.py +2 -2
  136. camel/generators.py +2 -2
  137. camel/human.py +2 -2
  138. camel/interpreters/__init__.py +4 -2
  139. camel/interpreters/base.py +2 -2
  140. camel/interpreters/docker/Dockerfile +14 -24
  141. camel/interpreters/docker_interpreter.py +5 -4
  142. camel/interpreters/e2b_interpreter.py +36 -3
  143. camel/interpreters/internal_python_interpreter.py +53 -4
  144. camel/interpreters/interpreter_error.py +2 -2
  145. camel/interpreters/ipython_interpreter.py +2 -2
  146. camel/interpreters/microsandbox_interpreter.py +395 -0
  147. camel/interpreters/subprocess_interpreter.py +2 -2
  148. camel/loaders/__init__.py +13 -4
  149. camel/loaders/apify_reader.py +2 -2
  150. camel/loaders/base_io.py +2 -2
  151. camel/loaders/base_loader.py +85 -0
  152. camel/loaders/chunkr_reader.py +11 -2
  153. camel/loaders/crawl4ai_reader.py +2 -2
  154. camel/loaders/firecrawl_reader.py +6 -6
  155. camel/loaders/jina_url_reader.py +2 -2
  156. camel/loaders/markitdown.py +2 -2
  157. camel/loaders/mineru_extractor.py +2 -2
  158. camel/loaders/mistral_reader.py +2 -2
  159. camel/loaders/scrapegraph_reader.py +2 -2
  160. camel/loaders/unstructured_io.py +2 -2
  161. camel/logger.py +5 -5
  162. camel/memories/__init__.py +2 -2
  163. camel/memories/agent_memories.py +86 -3
  164. camel/memories/base.py +36 -2
  165. camel/memories/blocks/__init__.py +2 -2
  166. camel/memories/blocks/chat_history_block.py +125 -7
  167. camel/memories/blocks/vectordb_block.py +10 -3
  168. camel/memories/context_creators/__init__.py +2 -2
  169. camel/memories/context_creators/score_based.py +109 -230
  170. camel/memories/records.py +90 -10
  171. camel/messages/__init__.py +2 -2
  172. camel/messages/base.py +178 -43
  173. camel/messages/conversion/__init__.py +2 -2
  174. camel/messages/conversion/alpaca.py +2 -2
  175. camel/messages/conversion/conversation_models.py +2 -2
  176. camel/messages/conversion/sharegpt/__init__.py +2 -2
  177. camel/messages/conversion/sharegpt/function_call_formatter.py +2 -2
  178. camel/messages/conversion/sharegpt/hermes/__init__.py +2 -2
  179. camel/messages/conversion/sharegpt/hermes/hermes_function_formatter.py +2 -2
  180. camel/messages/func_message.py +54 -17
  181. camel/models/__init__.py +18 -2
  182. camel/models/_utils.py +3 -3
  183. camel/models/aihubmix_model.py +83 -0
  184. camel/models/aiml_model.py +11 -18
  185. camel/models/amd_model.py +101 -0
  186. camel/models/anthropic_model.py +127 -20
  187. camel/models/aws_bedrock_model.py +12 -35
  188. camel/models/azure_openai_model.py +214 -115
  189. camel/models/base_audio_model.py +5 -3
  190. camel/models/base_model.py +378 -31
  191. camel/models/cerebras_model.py +83 -0
  192. camel/models/cohere_model.py +18 -49
  193. camel/models/cometapi_model.py +83 -0
  194. camel/models/crynux_model.py +11 -18
  195. camel/models/deepseek_model.py +20 -84
  196. camel/models/fish_audio_model.py +8 -2
  197. camel/models/function_gemma_model.py +889 -0
  198. camel/models/gemini_model.py +391 -52
  199. camel/models/groq_model.py +11 -19
  200. camel/models/internlm_model.py +11 -18
  201. camel/models/litellm_model.py +57 -49
  202. camel/models/lmstudio_model.py +17 -20
  203. camel/models/minimax_model.py +83 -0
  204. camel/models/mistral_model.py +20 -47
  205. camel/models/model_factory.py +39 -3
  206. camel/models/model_manager.py +26 -8
  207. camel/models/modelscope_model.py +13 -193
  208. camel/models/moonshot_model.py +183 -21
  209. camel/models/nebius_model.py +83 -0
  210. camel/models/nemotron_model.py +19 -9
  211. camel/models/netmind_model.py +11 -18
  212. camel/models/novita_model.py +11 -18
  213. camel/models/nvidia_model.py +11 -18
  214. camel/models/ollama_model.py +14 -21
  215. camel/models/openai_audio_models.py +2 -2
  216. camel/models/openai_compatible_model.py +190 -71
  217. camel/models/openai_model.py +192 -86
  218. camel/models/openrouter_model.py +11 -19
  219. camel/models/ppio_model.py +11 -18
  220. camel/models/qianfan_model.py +89 -0
  221. camel/models/qwen_model.py +13 -193
  222. camel/models/reka_model.py +23 -49
  223. camel/models/reward/__init__.py +2 -2
  224. camel/models/reward/base_reward_model.py +2 -2
  225. camel/models/reward/evaluator.py +2 -2
  226. camel/models/reward/nemotron_model.py +2 -2
  227. camel/models/reward/skywork_model.py +2 -2
  228. camel/models/samba_model.py +50 -75
  229. camel/models/sglang_model.py +90 -68
  230. camel/models/siliconflow_model.py +12 -35
  231. camel/models/stub_model.py +10 -7
  232. camel/models/togetherai_model.py +11 -18
  233. camel/models/vllm_model.py +10 -18
  234. camel/models/volcano_model.py +158 -19
  235. camel/models/watsonx_model.py +9 -47
  236. camel/models/yi_model.py +11 -18
  237. camel/models/zhipuai_model.py +70 -18
  238. camel/parsers/__init__.py +18 -0
  239. camel/parsers/mcp_tool_call_parser.py +176 -0
  240. camel/personas/__init__.py +2 -2
  241. camel/personas/persona.py +2 -2
  242. camel/personas/persona_hub.py +2 -2
  243. camel/prompts/__init__.py +2 -2
  244. camel/prompts/ai_society.py +2 -2
  245. camel/prompts/base.py +2 -2
  246. camel/prompts/code.py +2 -2
  247. camel/prompts/evaluation.py +2 -2
  248. camel/prompts/generate_text_embedding_data.py +2 -2
  249. camel/prompts/image_craft.py +2 -2
  250. camel/prompts/misalignment.py +2 -2
  251. camel/prompts/multi_condition_image_craft.py +2 -2
  252. camel/prompts/object_recognition.py +2 -2
  253. camel/prompts/persona_hub.py +3 -3
  254. camel/prompts/prompt_templates.py +2 -2
  255. camel/prompts/role_description_prompt_template.py +2 -2
  256. camel/prompts/solution_extraction.py +8 -8
  257. camel/prompts/task_prompt_template.py +2 -2
  258. camel/prompts/translation.py +2 -2
  259. camel/prompts/video_description_prompt.py +3 -3
  260. camel/responses/__init__.py +2 -2
  261. camel/responses/agent_responses.py +2 -2
  262. camel/retrievers/__init__.py +2 -2
  263. camel/retrievers/auto_retriever.py +3 -2
  264. camel/retrievers/base.py +2 -2
  265. camel/retrievers/bm25_retriever.py +2 -2
  266. camel/retrievers/cohere_rerank_retriever.py +2 -2
  267. camel/retrievers/hybrid_retrival.py +2 -2
  268. camel/retrievers/vector_retriever.py +2 -2
  269. camel/runtimes/Dockerfile.multi-toolkit +90 -0
  270. camel/runtimes/__init__.py +2 -2
  271. camel/runtimes/api.py +79 -23
  272. camel/runtimes/base.py +2 -2
  273. camel/runtimes/configs.py +13 -13
  274. camel/runtimes/daytona_runtime.py +17 -18
  275. camel/runtimes/docker_runtime.py +12 -12
  276. camel/runtimes/llm_guard_runtime.py +26 -26
  277. camel/runtimes/remote_http_runtime.py +11 -11
  278. camel/runtimes/ubuntu_docker_runtime.py +2 -2
  279. camel/runtimes/utils/__init__.py +2 -2
  280. camel/runtimes/utils/function_risk_toolkit.py +2 -2
  281. camel/runtimes/utils/ignore_risk_toolkit.py +2 -2
  282. camel/schemas/__init__.py +2 -2
  283. camel/schemas/base.py +2 -2
  284. camel/schemas/openai_converter.py +3 -3
  285. camel/schemas/outlines_converter.py +2 -2
  286. camel/services/agent_openapi_server.py +380 -0
  287. camel/societies/__init__.py +4 -2
  288. camel/societies/babyagi_playing.py +2 -2
  289. camel/societies/role_playing.py +201 -80
  290. camel/societies/workforce/__init__.py +10 -3
  291. camel/societies/workforce/base.py +2 -2
  292. camel/societies/workforce/events.py +145 -0
  293. camel/societies/workforce/prompts.py +259 -33
  294. camel/societies/workforce/role_playing_worker.py +88 -31
  295. camel/societies/workforce/single_agent_worker.py +638 -40
  296. camel/societies/workforce/structured_output_handler.py +512 -0
  297. camel/societies/workforce/task_channel.py +182 -38
  298. camel/societies/workforce/utils.py +780 -65
  299. camel/societies/workforce/worker.py +92 -26
  300. camel/societies/workforce/workflow_memory_manager.py +1746 -0
  301. camel/societies/workforce/workforce.py +5354 -372
  302. camel/societies/workforce/workforce_callback.py +103 -0
  303. camel/societies/workforce/workforce_logger.py +647 -0
  304. camel/societies/workforce/workforce_metrics.py +33 -0
  305. camel/storages/__init__.py +6 -2
  306. camel/storages/graph_storages/__init__.py +2 -2
  307. camel/storages/graph_storages/base.py +2 -2
  308. camel/storages/graph_storages/graph_element.py +2 -2
  309. camel/storages/graph_storages/nebula_graph.py +4 -4
  310. camel/storages/graph_storages/neo4j_graph.py +7 -7
  311. camel/storages/key_value_storages/__init__.py +2 -2
  312. camel/storages/key_value_storages/base.py +2 -2
  313. camel/storages/key_value_storages/in_memory.py +2 -2
  314. camel/storages/key_value_storages/json.py +17 -4
  315. camel/storages/key_value_storages/mem0_cloud.py +50 -49
  316. camel/storages/key_value_storages/redis.py +2 -2
  317. camel/storages/object_storages/__init__.py +2 -2
  318. camel/storages/object_storages/amazon_s3.py +2 -2
  319. camel/storages/object_storages/azure_blob.py +2 -2
  320. camel/storages/object_storages/base.py +2 -2
  321. camel/storages/object_storages/google_cloud.py +3 -3
  322. camel/storages/vectordb_storages/__init__.py +8 -2
  323. camel/storages/vectordb_storages/base.py +2 -2
  324. camel/storages/vectordb_storages/chroma.py +731 -0
  325. camel/storages/vectordb_storages/faiss.py +2 -2
  326. camel/storages/vectordb_storages/milvus.py +2 -2
  327. camel/storages/vectordb_storages/oceanbase.py +15 -15
  328. camel/storages/vectordb_storages/pgvector.py +349 -0
  329. camel/storages/vectordb_storages/qdrant.py +6 -6
  330. camel/storages/vectordb_storages/surreal.py +372 -0
  331. camel/storages/vectordb_storages/tidb.py +11 -8
  332. camel/storages/vectordb_storages/weaviate.py +2 -2
  333. camel/tasks/__init__.py +2 -2
  334. camel/tasks/task.py +348 -26
  335. camel/tasks/task_prompt.py +3 -3
  336. camel/terminators/__init__.py +2 -2
  337. camel/terminators/base.py +2 -2
  338. camel/terminators/response_terminator.py +2 -2
  339. camel/terminators/token_limit_terminator.py +2 -2
  340. camel/toolkits/__init__.py +57 -10
  341. camel/toolkits/aci_toolkit.py +66 -21
  342. camel/toolkits/arxiv_toolkit.py +8 -8
  343. camel/toolkits/ask_news_toolkit.py +2 -2
  344. camel/toolkits/async_browser_toolkit.py +4 -4
  345. camel/toolkits/audio_analysis_toolkit.py +3 -3
  346. camel/toolkits/base.py +106 -6
  347. camel/toolkits/bohrium_toolkit.py +2 -2
  348. camel/toolkits/browser_toolkit.py +34 -21
  349. camel/toolkits/browser_toolkit_commons.py +4 -4
  350. camel/toolkits/code_execution.py +31 -4
  351. camel/toolkits/context_summarizer_toolkit.py +684 -0
  352. camel/toolkits/craw4ai_toolkit.py +93 -0
  353. camel/toolkits/dappier_toolkit.py +12 -8
  354. camel/toolkits/data_commons_toolkit.py +2 -2
  355. camel/toolkits/dingtalk.py +1135 -0
  356. camel/toolkits/earth_science_toolkit.py +5367 -0
  357. camel/toolkits/edgeone_pages_mcp_toolkit.py +49 -0
  358. camel/toolkits/excel_toolkit.py +905 -71
  359. camel/toolkits/file_toolkit.py +1402 -0
  360. camel/toolkits/function_tool.py +205 -27
  361. camel/toolkits/github_toolkit.py +109 -22
  362. camel/toolkits/gmail_toolkit.py +1839 -0
  363. camel/toolkits/google_calendar_toolkit.py +40 -6
  364. camel/toolkits/google_drive_mcp_toolkit.py +54 -0
  365. camel/toolkits/google_maps_toolkit.py +2 -2
  366. camel/toolkits/google_scholar_toolkit.py +2 -2
  367. camel/toolkits/human_toolkit.py +36 -12
  368. camel/toolkits/hybrid_browser_toolkit/__init__.py +18 -0
  369. camel/toolkits/hybrid_browser_toolkit/config_loader.py +185 -0
  370. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +246 -0
  371. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +1958 -0
  372. camel/toolkits/hybrid_browser_toolkit/installer.py +203 -0
  373. camel/toolkits/hybrid_browser_toolkit/ts/package-lock.json +4589 -0
  374. camel/toolkits/hybrid_browser_toolkit/ts/package.json +33 -0
  375. camel/toolkits/hybrid_browser_toolkit/ts/src/browser-scripts.js +125 -0
  376. camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +1940 -0
  377. camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +233 -0
  378. camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +589 -0
  379. camel/toolkits/hybrid_browser_toolkit/ts/src/index.ts +7 -0
  380. camel/toolkits/hybrid_browser_toolkit/ts/src/parent-child-filter.ts +226 -0
  381. camel/toolkits/hybrid_browser_toolkit/ts/src/snapshot-parser.ts +219 -0
  382. camel/toolkits/hybrid_browser_toolkit/ts/src/som-screenshot-injected.ts +543 -0
  383. camel/toolkits/hybrid_browser_toolkit/ts/src/types.ts +129 -0
  384. camel/toolkits/hybrid_browser_toolkit/ts/tsconfig.json +27 -0
  385. camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +325 -0
  386. camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +1037 -0
  387. camel/toolkits/hybrid_browser_toolkit_py/__init__.py +17 -0
  388. camel/toolkits/hybrid_browser_toolkit_py/actions.py +575 -0
  389. camel/toolkits/hybrid_browser_toolkit_py/agent.py +311 -0
  390. camel/toolkits/hybrid_browser_toolkit_py/browser_session.py +787 -0
  391. camel/toolkits/hybrid_browser_toolkit_py/config_loader.py +490 -0
  392. camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +2390 -0
  393. camel/toolkits/hybrid_browser_toolkit_py/snapshot.py +233 -0
  394. camel/toolkits/hybrid_browser_toolkit_py/stealth_script.js +0 -0
  395. camel/toolkits/hybrid_browser_toolkit_py/unified_analyzer.js +1043 -0
  396. camel/toolkits/image_analysis_toolkit.py +3 -6
  397. camel/toolkits/image_generation_toolkit.py +390 -0
  398. camel/toolkits/jina_reranker_toolkit.py +5 -6
  399. camel/toolkits/klavis_toolkit.py +7 -3
  400. camel/toolkits/linkedin_toolkit.py +2 -2
  401. camel/toolkits/markitdown_toolkit.py +104 -0
  402. camel/toolkits/math_toolkit.py +66 -12
  403. camel/toolkits/mcp_toolkit.py +412 -36
  404. camel/toolkits/memory_toolkit.py +7 -3
  405. camel/toolkits/meshy_toolkit.py +2 -2
  406. camel/toolkits/message_agent_toolkit.py +608 -0
  407. camel/toolkits/message_integration.py +728 -0
  408. camel/toolkits/microsoft_outlook_mail_toolkit.py +1885 -0
  409. camel/toolkits/mineru_toolkit.py +2 -2
  410. camel/toolkits/minimax_mcp_toolkit.py +195 -0
  411. camel/toolkits/networkx_toolkit.py +2 -2
  412. camel/toolkits/note_taking_toolkit.py +277 -0
  413. camel/toolkits/notion_mcp_toolkit.py +224 -0
  414. camel/toolkits/notion_toolkit.py +2 -2
  415. camel/toolkits/open_api_specs/biztoc/__init__.py +2 -2
  416. camel/toolkits/open_api_specs/biztoc/ai-plugin.json +1 -1
  417. camel/toolkits/open_api_specs/coursera/__init__.py +2 -2
  418. camel/toolkits/open_api_specs/create_qr_code/__init__.py +2 -2
  419. camel/toolkits/open_api_specs/klarna/__init__.py +2 -2
  420. camel/toolkits/open_api_specs/nasa_apod/__init__.py +2 -2
  421. camel/toolkits/open_api_specs/outschool/__init__.py +2 -2
  422. camel/toolkits/open_api_specs/outschool/ai-plugin.json +1 -1
  423. camel/toolkits/open_api_specs/outschool/openapi.yaml +1 -1
  424. camel/toolkits/open_api_specs/outschool/paths/__init__.py +2 -2
  425. camel/toolkits/open_api_specs/outschool/paths/get_classes.py +2 -2
  426. camel/toolkits/open_api_specs/outschool/paths/search_teachers.py +2 -2
  427. camel/toolkits/open_api_specs/security_config.py +2 -2
  428. camel/toolkits/open_api_specs/speak/__init__.py +2 -2
  429. camel/toolkits/open_api_specs/web_scraper/__init__.py +2 -2
  430. camel/toolkits/open_api_specs/web_scraper/ai-plugin.json +1 -1
  431. camel/toolkits/open_api_specs/web_scraper/paths/__init__.py +2 -2
  432. camel/toolkits/open_api_specs/web_scraper/paths/scraper.py +2 -2
  433. camel/toolkits/open_api_toolkit.py +2 -2
  434. camel/toolkits/openbb_toolkit.py +7 -3
  435. camel/toolkits/origene_mcp_toolkit.py +56 -0
  436. camel/toolkits/page_script.js +53 -53
  437. camel/toolkits/playwright_mcp_toolkit.py +13 -31
  438. camel/toolkits/pptx_toolkit.py +36 -23
  439. camel/toolkits/pubmed_toolkit.py +2 -2
  440. camel/toolkits/pulse_mcp_search_toolkit.py +2 -2
  441. camel/toolkits/pyautogui_toolkit.py +2 -2
  442. camel/toolkits/reddit_toolkit.py +2 -2
  443. camel/toolkits/resend_toolkit.py +168 -0
  444. camel/toolkits/retrieval_toolkit.py +2 -2
  445. camel/toolkits/screenshot_toolkit.py +213 -0
  446. camel/toolkits/search_toolkit.py +606 -156
  447. camel/toolkits/searxng_toolkit.py +2 -2
  448. camel/toolkits/semantic_scholar_toolkit.py +2 -2
  449. camel/toolkits/slack_toolkit.py +108 -58
  450. camel/toolkits/sql_toolkit.py +712 -0
  451. camel/toolkits/stripe_toolkit.py +2 -2
  452. camel/toolkits/sympy_toolkit.py +3 -3
  453. camel/toolkits/task_planning_toolkit.py +5 -5
  454. camel/toolkits/terminal_toolkit/__init__.py +18 -0
  455. camel/toolkits/terminal_toolkit/terminal_toolkit.py +1281 -0
  456. camel/toolkits/terminal_toolkit/utils.py +659 -0
  457. camel/toolkits/thinking_toolkit.py +3 -3
  458. camel/toolkits/twitter_toolkit.py +2 -2
  459. camel/toolkits/vertex_ai_veo_toolkit.py +590 -0
  460. camel/toolkits/video_analysis_toolkit.py +109 -29
  461. camel/toolkits/video_download_toolkit.py +19 -16
  462. camel/toolkits/weather_toolkit.py +2 -2
  463. camel/toolkits/web_deploy_toolkit.py +1219 -0
  464. camel/toolkits/wechat_official_toolkit.py +483 -0
  465. camel/toolkits/whatsapp_toolkit.py +2 -2
  466. camel/toolkits/wolfram_alpha_toolkit.py +2 -2
  467. camel/toolkits/zapier_toolkit.py +7 -3
  468. camel/types/__init__.py +4 -4
  469. camel/types/agents/__init__.py +2 -2
  470. camel/types/agents/tool_calling_record.py +6 -3
  471. camel/types/enums.py +381 -41
  472. camel/types/mcp_registries.py +2 -2
  473. camel/types/openai_types.py +4 -4
  474. camel/types/unified_model_type.py +46 -10
  475. camel/utils/__init__.py +5 -2
  476. camel/utils/agent_context.py +41 -0
  477. camel/utils/async_func.py +2 -2
  478. camel/utils/chunker/__init__.py +2 -2
  479. camel/utils/chunker/base.py +2 -2
  480. camel/utils/chunker/code_chunker.py +2 -2
  481. camel/utils/chunker/uio_chunker.py +2 -2
  482. camel/utils/commons.py +38 -7
  483. camel/utils/constants.py +5 -2
  484. camel/utils/context_utils.py +1134 -0
  485. camel/utils/deduplication.py +2 -2
  486. camel/utils/filename.py +2 -2
  487. camel/utils/langfuse.py +18 -10
  488. camel/utils/mcp.py +140 -6
  489. camel/utils/mcp_client.py +48 -38
  490. camel/utils/message_summarizer.py +148 -0
  491. camel/utils/response_format.py +2 -2
  492. camel/utils/token_counting.py +45 -22
  493. camel/utils/tool_result.py +44 -0
  494. camel/verifiers/__init__.py +2 -2
  495. camel/verifiers/base.py +2 -2
  496. camel/verifiers/math_verifier.py +2 -2
  497. camel/verifiers/models.py +2 -2
  498. camel/verifiers/physics_verifier.py +2 -2
  499. camel/verifiers/python_verifier.py +2 -2
  500. {camel_ai-0.2.65.dist-info → camel_ai-0.2.83a6.dist-info}/METADATA +355 -117
  501. camel_ai-0.2.83a6.dist-info/RECORD +511 -0
  502. {camel_ai-0.2.65.dist-info → camel_ai-0.2.83a6.dist-info}/WHEEL +1 -1
  503. {camel_ai-0.2.65.dist-info → camel_ai-0.2.83a6.dist-info}/licenses/LICENSE +1 -1
  504. camel/loaders/pandas_reader.py +0 -368
  505. camel/toolkits/dalle_toolkit.py +0 -175
  506. camel/toolkits/file_write_toolkit.py +0 -444
  507. camel/toolkits/openai_agent_toolkit.py +0 -135
  508. camel/toolkits/terminal_toolkit.py +0 -1037
  509. camel_ai-0.2.65.dist-info/RECORD +0 -426
@@ -0,0 +1,1940 @@
1
+ import { Page, Browser, BrowserContext, chromium, ConsoleMessage, Frame } from 'playwright';
2
+ import { BrowserToolkitConfig, SnapshotResult, SnapshotElement, ActionResult, TabInfo, BrowserAction, DetailedTiming } from './types';
3
+ import { ConfigLoader, StealthConfig } from './config-loader';
4
+
5
+ export class HybridBrowserSession {
6
+ private browser: Browser | null = null;
7
+ private context: BrowserContext | null = null;
8
+ private contextOwnedByUs: boolean = false;
9
+ private pages: Map<string, Page> = new Map();
10
+ private consoleLogs: Map<string, ConsoleMessage[]> = new Map();
11
+ private currentTabId: string | null = null;
12
+ private tabCounter = 0;
13
+ private configLoader: ConfigLoader;
14
+ private scrollPosition: { x: number; y: number } = {x: 0, y: 0};
15
+ private hasNavigatedBefore = false; // Track if we've navigated before
16
+ private logLimit: number;
17
+
18
+ constructor(config: BrowserToolkitConfig = {}) {
19
+ // Use ConfigLoader's fromPythonConfig to handle conversion properly
20
+ this.configLoader = ConfigLoader.fromPythonConfig(config);
21
+ // Load browser configuration for console log limit, default to 1000
22
+ this.logLimit = this.configLoader.getBrowserConfig().consoleLogLimit || 1000;
23
+ }
24
+
25
+ /**
26
+ * Compatibility wrapper for _snapshotForAI() API
27
+ * Handles both old (string return) and new (object with .full) versions
28
+ */
29
+ private async getSnapshot(page: Page): Promise<string> {
30
+ const result = await (page as any)._snapshotForAI();
31
+ // Version compatibility: if result is object (v1.57.0+), use .full property
32
+ // If result is string (v1.56.x and earlier), return directly
33
+ return typeof result === 'string' ? result : result.full;
34
+ }
35
+
36
+ private registerNewPage(tabId: string, page: Page): void {
37
+ // Register page and logs with tabId
38
+ this.pages.set(tabId, page);
39
+ this.consoleLogs.set(tabId, []);
40
+ // Set up console log listener for the page
41
+ page.on('console', (msg: ConsoleMessage) => {
42
+ const logs = this.consoleLogs.get(tabId);
43
+ if (logs) {
44
+ logs.push(msg);
45
+ if (logs.length > this.logLimit) {
46
+ logs.shift();
47
+ }
48
+ }
49
+ });
50
+
51
+ // Clean logs on page close
52
+ page.on('close', () => {
53
+ this.consoleLogs.delete(tabId);
54
+ });
55
+ }
56
+
57
+ async ensureBrowser(): Promise<void> {
58
+ if (this.browser) {
59
+ return;
60
+ }
61
+
62
+ const browserConfig = this.configLoader.getBrowserConfig();
63
+ const stealthConfig = this.configLoader.getStealthConfig();
64
+
65
+ // Check if CDP URL is provided
66
+ if (browserConfig.cdpUrl) {
67
+ // Connect to existing browser via CDP
68
+ this.browser = await chromium.connectOverCDP(browserConfig.cdpUrl);
69
+
70
+ // Get existing contexts or create new one
71
+ const contexts = this.browser.contexts();
72
+ if (contexts.length > 0) {
73
+ this.context = contexts[0];
74
+ this.contextOwnedByUs = false;
75
+
76
+ // Apply stealth headers to existing context if configured
77
+ // Note: userAgent cannot be changed on an existing context
78
+ if (stealthConfig.enabled) {
79
+ if (stealthConfig.extraHTTPHeaders) {
80
+ await this.context.setExtraHTTPHeaders(stealthConfig.extraHTTPHeaders);
81
+ }
82
+ if (stealthConfig.userAgent) {
83
+ console.warn('[HybridBrowserSession] Cannot apply userAgent to existing context. Consider creating a new context if userAgent customization is required.');
84
+ }
85
+ }
86
+ } else {
87
+ const contextOptions: any = {
88
+ viewport: browserConfig.viewport
89
+ };
90
+
91
+ // Apply stealth headers and UA if configured
92
+ if (stealthConfig.enabled) {
93
+ if (stealthConfig.extraHTTPHeaders) {
94
+ contextOptions.extraHTTPHeaders = stealthConfig.extraHTTPHeaders;
95
+ }
96
+ if (stealthConfig.userAgent) {
97
+ contextOptions.userAgent = stealthConfig.userAgent;
98
+ }
99
+ }
100
+
101
+ this.context = await this.browser.newContext(contextOptions);
102
+ this.contextOwnedByUs = true;
103
+ this.browser = this.context.browser();
104
+ }
105
+
106
+ const pages = this.context.pages();
107
+ console.log(`[CDP] cdpKeepCurrentPage: ${browserConfig.cdpKeepCurrentPage}, pages count: ${pages.length}`);
108
+ if (browserConfig.cdpKeepCurrentPage) {
109
+ // Use existing page without creating new ones
110
+ if (pages.length > 0) {
111
+ // Find first non-closed page
112
+ let validPage: Page | null = null;
113
+ for (const page of pages) {
114
+ if (!page.isClosed()) {
115
+ validPage = page;
116
+ break;
117
+ }
118
+ }
119
+
120
+ if (validPage) {
121
+ const tabId = this.generateTabId();
122
+ this.registerNewPage(tabId, validPage);
123
+ this.currentTabId = tabId;
124
+ console.log(`[CDP] cdpKeepCurrentPage mode: using existing page as initial tab: ${tabId}, URL: ${validPage.url()}`);
125
+ } else {
126
+ throw new Error('No active pages available in CDP mode with cdpKeepCurrentPage=true (all pages are closed)');
127
+ }
128
+ } else {
129
+ throw new Error('No pages available in CDP mode with cdpKeepCurrentPage=true');
130
+ }
131
+ } else {
132
+ // Look for blank pages or create new ones
133
+ if (pages.length > 0) {
134
+ // Find one available blank page
135
+ let availablePageFound = false;
136
+ for (const page of pages) {
137
+ const pageUrl = page.url();
138
+ if (this.isBlankPageUrl(pageUrl)) {
139
+ const tabId = this.generateTabId();
140
+ this.registerNewPage(tabId, page);
141
+ this.currentTabId = tabId;
142
+ availablePageFound = true;
143
+ console.log(`[CDP] Registered blank page as initial tab: ${tabId}, URL: ${pageUrl}`);
144
+ break;
145
+ }
146
+ }
147
+
148
+ if (!availablePageFound) {
149
+ console.log('[CDP] No blank pages found, creating new page');
150
+ const newPage = await this.context.newPage();
151
+ const tabId = this.generateTabId();
152
+ this.registerNewPage(tabId, newPage);
153
+ this.currentTabId = tabId;
154
+ }
155
+ } else {
156
+ console.log('[CDP] No existing pages, creating initial page');
157
+ const newPage = await this.context.newPage();
158
+ const tabId = this.generateTabId();
159
+ this.registerNewPage(tabId, newPage);
160
+ this.currentTabId = tabId;
161
+ }
162
+ }
163
+ } else {
164
+ // Original launch logic
165
+ const launchOptions: any = {
166
+ headless: browserConfig.headless,
167
+ };
168
+
169
+ if (stealthConfig.enabled) {
170
+ launchOptions.args = stealthConfig.args || [];
171
+
172
+ // Apply stealth user agent/headers if configured
173
+ if (stealthConfig.userAgent) {
174
+ launchOptions.userAgent = stealthConfig.userAgent;
175
+ }
176
+ if (stealthConfig.extraHTTPHeaders) {
177
+ launchOptions.extraHTTPHeaders = stealthConfig.extraHTTPHeaders;
178
+ }
179
+ }
180
+
181
+ if (browserConfig.userDataDir) {
182
+ // Ensure viewport is honored in persistent context
183
+ launchOptions.viewport = browserConfig.viewport;
184
+ this.context = await chromium.launchPersistentContext(
185
+ browserConfig.userDataDir,
186
+ launchOptions
187
+ );
188
+ this.contextOwnedByUs = true;
189
+ this.browser = this.context.browser();
190
+ const pages = this.context.pages();
191
+ if (pages.length > 0) {
192
+ const initialTabId = this.generateTabId();
193
+ this.registerNewPage(initialTabId, pages[0]);
194
+ this.currentTabId = initialTabId;
195
+ }
196
+ } else {
197
+ this.browser = await chromium.launch(launchOptions);
198
+ const contextOptions: any = {
199
+ viewport: browserConfig.viewport
200
+ };
201
+
202
+ // Apply stealth headers and UA if configured
203
+ if (stealthConfig.enabled) {
204
+ if (stealthConfig.extraHTTPHeaders) {
205
+ contextOptions.extraHTTPHeaders = stealthConfig.extraHTTPHeaders;
206
+ }
207
+ if (stealthConfig.userAgent) {
208
+ contextOptions.userAgent = stealthConfig.userAgent;
209
+ }
210
+ }
211
+
212
+ this.context = await this.browser.newContext(contextOptions);
213
+ this.contextOwnedByUs = true;
214
+
215
+ const initialPage = await this.context.newPage();
216
+ const initialTabId = this.generateTabId();
217
+ this.registerNewPage(initialTabId, initialPage);
218
+ this.currentTabId = initialTabId;
219
+ }
220
+ }
221
+
222
+ // Set timeouts
223
+ for (const page of this.pages.values()) {
224
+ page.setDefaultNavigationTimeout(browserConfig.navigationTimeout);
225
+ page.setDefaultTimeout(browserConfig.navigationTimeout);
226
+ }
227
+ }
228
+
229
+ private generateTabId(): string {
230
+ const browserConfig = this.configLoader.getBrowserConfig();
231
+ return `${browserConfig.tabIdPrefix}${String(++this.tabCounter).padStart(browserConfig.tabCounterPadding, '0')}`;
232
+ }
233
+
234
+ private isBlankPageUrl(url: string): boolean {
235
+ // Unified blank page detection logic used across the codebase
236
+ const browserConfig = this.configLoader.getBrowserConfig();
237
+ return (
238
+ // Standard about:blank variations (prefix match for query params)
239
+ url === 'about:blank' ||
240
+ url.startsWith('about:blank?') ||
241
+ // Configured blank page URLs (exact match for compatibility)
242
+ browserConfig.blankPageUrls.includes(url) ||
243
+ // Empty URL
244
+ url === '' ||
245
+ // Data URLs (often used for blank pages)
246
+ url.startsWith(browserConfig.dataUrlPrefix || 'data:')
247
+ );
248
+ }
249
+
250
+ async getCurrentPage(): Promise<Page> {
251
+ if (!this.currentTabId || !this.pages.has(this.currentTabId)) {
252
+ const browserConfig = this.configLoader.getBrowserConfig();
253
+
254
+ // In CDP keep-current-page mode, find existing page
255
+ if (browserConfig.cdpKeepCurrentPage && browserConfig.cdpUrl && this.context) {
256
+ const allPages = this.context.pages();
257
+ console.log(`[getCurrentPage] cdpKeepCurrentPage mode: Looking for existing page, found ${allPages.length} pages`);
258
+
259
+ if (allPages.length > 0) {
260
+ // Try to find a page that's not already tracked
261
+ for (const page of allPages) {
262
+ const isTracked = Array.from(this.pages.values()).includes(page);
263
+ if (!isTracked && !page.isClosed()) {
264
+ const tabId = this.generateTabId();
265
+ this.registerNewPage(tabId, page);
266
+ this.currentTabId = tabId;
267
+ console.log(`[getCurrentPage] cdpKeepCurrentPage mode: Found and registered untracked page: ${tabId}`);
268
+ return page;
269
+ }
270
+ }
271
+
272
+ // If all pages are tracked, use the first available one
273
+ const firstPage = allPages[0];
274
+ if (!firstPage.isClosed()) {
275
+ // Find the tab ID for this page
276
+ for (const [tabId, page] of this.pages.entries()) {
277
+ if (page === firstPage) {
278
+ this.currentTabId = tabId;
279
+ console.log(`[getCurrentPage] cdpKeepCurrentPage mode: Using existing tracked page: ${tabId}`);
280
+ return page;
281
+ }
282
+ }
283
+ }
284
+ }
285
+
286
+ throw new Error('No active page available in CDP mode with cdpKeepCurrentPage=true');
287
+ }
288
+
289
+ // Normal mode: create new page
290
+ if (this.context) {
291
+ console.log('[getCurrentPage] No active page, creating new page');
292
+ const newPage = await this.context.newPage();
293
+ const tabId = this.generateTabId();
294
+ this.registerNewPage(tabId, newPage);
295
+ this.currentTabId = tabId;
296
+
297
+ newPage.setDefaultNavigationTimeout(browserConfig.navigationTimeout);
298
+ newPage.setDefaultTimeout(browserConfig.navigationTimeout);
299
+
300
+ return newPage;
301
+ }
302
+ throw new Error('No browser context available');
303
+ }
304
+ return this.pages.get(this.currentTabId)!;
305
+ }
306
+
307
+ async getCurrentLogs(): Promise<ConsoleMessage[]> {
308
+ if (!this.currentTabId || !this.consoleLogs.has(this.currentTabId)) {
309
+ return [];
310
+ }
311
+ return this.consoleLogs.get(this.currentTabId) || [];
312
+ }
313
+
314
+ /**
315
+ * Get current scroll position from the page
316
+ */
317
+ private async getCurrentScrollPosition(): Promise<{ x: number; y: number }> {
318
+ try {
319
+ const page = await this.getCurrentPage();
320
+ const scrollInfo = await page.evaluate(() => {
321
+ return {
322
+ x: window.pageXOffset || document.documentElement.scrollLeft || 0,
323
+ y: window.pageYOffset || document.documentElement.scrollTop || 0,
324
+ devicePixelRatio: window.devicePixelRatio || 1,
325
+ zoomLevel: window.outerWidth / window.innerWidth || 1
326
+ };
327
+ }) as { x: number; y: number; devicePixelRatio: number; zoomLevel: number };
328
+
329
+ // Store scroll position
330
+ this.scrollPosition = { x: scrollInfo.x, y: scrollInfo.y };
331
+ return this.scrollPosition;
332
+ } catch (error) {
333
+ console.warn('Failed to get scroll position:', error);
334
+ return this.scrollPosition;
335
+ }
336
+ }
337
+
338
+ async getSnapshotForAI(includeCoordinates = false, viewportLimit = false): Promise<SnapshotResult & { timing: DetailedTiming }> {
339
+ // Always use native Playwright mapping - this is the correct approach
340
+ return this.getSnapshotForAINative(includeCoordinates, viewportLimit);
341
+ }
342
+
343
+
344
+ private buildSnapshotIndex(snapshotText: string): Map<string, string|undefined> {
345
+ // Get ref to role mapping from snapshot text
346
+ const index = new Map<string, string|undefined>();
347
+ const refRe = /\[ref=([^\]]+)\]/i;
348
+ for (const line of snapshotText.split('\n')) {
349
+ const m = line.match(refRe);
350
+ if (!m) continue;
351
+ const ref = m[1];
352
+ const roleMatch = line.match(/^\s*-?\s*([a-z0-9_-]+)/i);
353
+ const role = roleMatch ? roleMatch[1].toLowerCase() : undefined;
354
+ index.set(ref, role);
355
+ }
356
+ return index;
357
+ }
358
+
359
+
360
+ private filterElementsInViewport(
361
+ elements: Record<string, SnapshotElement>,
362
+ viewport: { width: number, height: number },
363
+ scrollPos: { x: number, y: number }
364
+ ): Record<string, SnapshotElement> {
365
+ const filtered: Record<string, SnapshotElement> = {};
366
+ // Apply viewport filtering
367
+ // boundingBox() returns viewport-relative coordinates, so we don't need to add scroll offsets
368
+ const viewportLeft = 0;
369
+ const viewportTop = 0;
370
+ const viewportRight = viewport.width;
371
+ const viewportBottom = viewport.height;
372
+
373
+ for (const [ref, element] of Object.entries(elements)) {
374
+ // If element has no coordinates, include it (fallback)
375
+ if (!element.coordinates) {
376
+ filtered[ref] = element;
377
+ continue;
378
+ }
379
+
380
+ const { x, y, width, height } = element.coordinates;
381
+
382
+ // Check if element is visible in current viewport
383
+ // Element is visible if it overlaps with viewport bounds
384
+ // Since boundingBox() coords are viewport-relative, we compare directly
385
+ const isVisible = (
386
+ x < viewportRight && // Left edge is before viewport right
387
+ y < viewportBottom && // Top edge is before viewport bottom
388
+ x + width > viewportLeft && // Right edge is after viewport left
389
+ y + height > viewportTop // Bottom edge is after viewport top
390
+ );
391
+
392
+ if (isVisible) {
393
+ filtered[ref] = element;
394
+ }
395
+ }
396
+
397
+ return filtered;
398
+ }
399
+
400
+
401
+ private filterSnapshotLines(
402
+ lines: string[],
403
+ viewportRefs: Set<string>,
404
+ tabSize: number = 2
405
+ ): string[] {
406
+ // Filter snapshot lines to include only those in viewportRefs
407
+ // and their context
408
+ const levelStack: number[] = [];
409
+ const filteredLines: string[] = [];
410
+ for (const line of lines) {
411
+ const refMatch = line.match(/\[ref=([^\]]+)\]/);
412
+ const indentMatch = line.match(/^(\s*)/);
413
+ const level = indentMatch ? indentMatch[1].length / tabSize : 0;
414
+ const prevLevel = levelStack[levelStack.length - 1] ?? 0;
415
+ const levelDiff = level - prevLevel;
416
+ // Pop stack when going up to the parent level
417
+ if (levelDiff <= 0) {
418
+ while (
419
+ levelStack.length > 0 &&
420
+ levelStack[levelStack.length - 1] >= level
421
+ ) {
422
+ levelStack.pop();
423
+ }
424
+ }
425
+ // Line has a ref
426
+ if (refMatch && viewportRefs.has(refMatch[1])) {
427
+ levelStack.push(level);
428
+ filteredLines.push(line);
429
+ continue;
430
+ }
431
+ // Line without ref - include if it's a header or direct child of tracked element
432
+ if (!refMatch && (levelDiff === 0 || levelDiff === 1)) {
433
+ filteredLines.push(line);
434
+ }
435
+ }
436
+ return filteredLines;
437
+ }
438
+
439
+ private rebuildSnapshotText(
440
+ originalSnapshot: string,
441
+ filteredElements: Record<string, SnapshotElement>): string {
442
+ const lines = originalSnapshot.split('\n');
443
+ const filteredLines = this.filterSnapshotLines(lines, new Set(Object.keys(filteredElements)));
444
+ const filteredContent = filteredLines.join('\n');
445
+ return filteredContent;
446
+ }
447
+
448
+
449
+ private async getSnapshotForAINative(includeCoordinates = false, viewportLimit = false): Promise<SnapshotResult & { timing: DetailedTiming }> {
450
+ const startTime = Date.now();
451
+ const page = await this.getCurrentPage();
452
+
453
+ try {
454
+ // Use _snapshotForAI() to properly update _lastAriaSnapshot
455
+ const snapshotStart = Date.now();
456
+ const snapshotText = await this.getSnapshot(page);
457
+ const snapshotTime = Date.now() - snapshotStart;
458
+
459
+ // Extract refs from the snapshot text
460
+ const refPattern = /\[ref=([^\]]+)\]/g;
461
+ const refs: string[] = [];
462
+ let match;
463
+ while ((match = refPattern.exec(snapshotText)) !== null) {
464
+ refs.push(match[1]);
465
+ }
466
+
467
+ // Get element information including coordinates if needed
468
+ const mappingStart = Date.now();
469
+ const playwrightMapping: Record<string, any> = {};
470
+
471
+ // Parse element info in a single pass
472
+ const snapshotIndex = this.buildSnapshotIndex(snapshotText);
473
+ for (const ref of refs) {
474
+ const role = snapshotIndex.get(ref) || undefined;
475
+ playwrightMapping[ref] = {
476
+ ref,
477
+ role: role || 'unknown',
478
+ };
479
+ }
480
+
481
+ if (includeCoordinates) {
482
+ // Get coordinates for each ref using aria-ref selector
483
+ for (const ref of refs) {
484
+ try {
485
+ const selector = `aria-ref=${ref}`;
486
+ const element = await page.locator(selector).first();
487
+ const exists = await element.count() > 0;
488
+
489
+ if (exists) {
490
+ // Get bounding box
491
+ const boundingBox = await element.boundingBox();
492
+
493
+ if (boundingBox) {
494
+ // Add coordinates to existing element info
495
+ playwrightMapping[ref] = {
496
+ ...playwrightMapping[ref],
497
+ coordinates: {
498
+ x: Math.round(boundingBox.x),
499
+ y: Math.round(boundingBox.y),
500
+ width: Math.round(boundingBox.width),
501
+ height: Math.round(boundingBox.height)
502
+ }
503
+ };
504
+ }
505
+ }
506
+ } catch (error) {
507
+ console.warn(`Failed to get coordinates for ref ${ref}:`, error);
508
+ }
509
+ }
510
+ }
511
+
512
+ const mappingTime = Date.now() - mappingStart;
513
+
514
+ // Apply viewport filtering if requested
515
+ let finalElements = playwrightMapping;
516
+ let finalSnapshot = snapshotText;
517
+
518
+ if (viewportLimit) {
519
+ const viewport = page.viewportSize() || { width: 1280, height: 720 };
520
+ const scrollPos = await this.getCurrentScrollPosition();
521
+ finalElements = this.filterElementsInViewport(playwrightMapping, viewport, scrollPos);
522
+ finalSnapshot = this.rebuildSnapshotText(snapshotText, finalElements);
523
+ }
524
+
525
+ const totalTime = Date.now() - startTime;
526
+
527
+ return {
528
+ snapshot: finalSnapshot,
529
+ elements: finalElements,
530
+ metadata: {
531
+ elementCount: Object.keys(finalElements).length,
532
+ url: page.url(),
533
+ timestamp: new Date().toISOString(),
534
+ },
535
+ timing: {
536
+ total_time_ms: totalTime,
537
+ snapshot_time_ms: snapshotTime,
538
+ coordinate_enrichment_time_ms: 0, // Integrated into mapping
539
+ aria_mapping_time_ms: mappingTime,
540
+ },
541
+ };
542
+ } catch (error) {
543
+ console.error('Failed to get AI snapshot with native mapping:', error);
544
+ const totalTime = Date.now() - startTime;
545
+
546
+ return {
547
+ snapshot: 'Error: Unable to capture page snapshot',
548
+ elements: {},
549
+ metadata: {
550
+ elementCount: 0,
551
+ url: page.url(),
552
+ timestamp: new Date().toISOString(),
553
+ },
554
+ timing: {
555
+ total_time_ms: totalTime,
556
+ snapshot_time_ms: 0,
557
+ coordinate_enrichment_time_ms: 0,
558
+ aria_mapping_time_ms: 0,
559
+ },
560
+ };
561
+ }
562
+ }
563
+
564
+
565
+
566
+ /**
567
+ * Enhanced click implementation with new tab detection and scroll fix
568
+ */
569
+ private async performClick(page: Page, ref: string): Promise<{ success: boolean; method?: string; error?: string; newTabId?: string; diffSnapshot?: string }> {
570
+
571
+ try {
572
+ // Ensure we have the latest snapshot and mapping
573
+ await this.getSnapshot(page);
574
+
575
+ // Use Playwright's aria-ref selector engine
576
+ const selector = `aria-ref=${ref}`;
577
+
578
+ // Check if element exists
579
+ const element = await page.locator(selector).first();
580
+ const exists = await element.count() > 0;
581
+
582
+ if (!exists) {
583
+ return { success: false, error: `Element with ref ${ref} not found` };
584
+ }
585
+
586
+ const role = await element.getAttribute('role');
587
+ const elementTagName = await element.evaluate(el => el.tagName.toLowerCase());
588
+ const isCombobox = role === 'combobox' || elementTagName === 'combobox';
589
+ const isTextbox = role === 'textbox' || elementTagName === 'input' || elementTagName === 'textarea';
590
+ const shouldCheckDiff = isCombobox || isTextbox;
591
+
592
+ let snapshotBefore: string | null = null;
593
+ let comboboxAriaLabel: string | null = null;
594
+ if (shouldCheckDiff) {
595
+ snapshotBefore = await this.getSnapshot(page);
596
+ // Capture aria-label for combobox to find it again after click (ref may change)
597
+ if (isCombobox) {
598
+ comboboxAriaLabel = await element.getAttribute('aria-label');
599
+ if (!comboboxAriaLabel) {
600
+ // Try to get accessible name from aria-labelledby or inner text
601
+ comboboxAriaLabel = await element.evaluate(el => {
602
+ const labelledBy = el.getAttribute('aria-labelledby');
603
+ if (labelledBy) {
604
+ const labelEl = document.getElementById(labelledBy);
605
+ if (labelEl) return labelEl.textContent?.trim() || null;
606
+ }
607
+ return el.textContent?.trim() || null;
608
+ });
609
+ }
610
+ }
611
+ }
612
+
613
+ // Check element properties
614
+ const browserConfig = this.configLoader.getBrowserConfig();
615
+ const target = await element.getAttribute(browserConfig.targetAttribute);
616
+ const href = await element.getAttribute(browserConfig.hrefAttribute);
617
+ const onclick = await element.getAttribute(browserConfig.onclickAttribute);
618
+ const tagName = await element.evaluate(el => el.tagName.toLowerCase());
619
+
620
+ // Check if element naturally opens new tab
621
+ const naturallyOpensNewTab = (
622
+ target === browserConfig.blankTarget ||
623
+ (onclick && onclick.includes(browserConfig.windowOpenString)) ||
624
+ (tagName === 'a' && href && (href.includes(`javascript:${browserConfig.windowOpenString}`) || href.includes(browserConfig.blankTarget)))
625
+ );
626
+
627
+ // Open ALL links in new tabs
628
+ // Check if this is a navigable link
629
+ const isNavigableLink = tagName === 'a' && href &&
630
+ !href.startsWith(browserConfig.anchorOnly) && // Not an anchor link
631
+ !href.startsWith(browserConfig.javascriptVoidPrefix) && // Not a void javascript
632
+ href !== browserConfig.javascriptVoidEmpty && // Not empty javascript
633
+ href !== browserConfig.anchorOnly; // Not just #
634
+
635
+ const shouldOpenNewTab = naturallyOpensNewTab || isNavigableLink;
636
+
637
+
638
+ if (shouldOpenNewTab) {
639
+ // Handle new tab opening
640
+ // If it's a link that doesn't naturally open in new tab, force it
641
+ if (isNavigableLink && !naturallyOpensNewTab) {
642
+ await element.evaluate((el, blankTarget) => {
643
+ if (el.tagName.toLowerCase() === 'a') {
644
+ el.setAttribute('target', blankTarget);
645
+ }
646
+ }, browserConfig.blankTarget);
647
+ }
648
+
649
+ // Set up popup listener before clicking
650
+ const popupPromise = page.context().waitForEvent('page', { timeout: browserConfig.popupTimeout });
651
+
652
+ // Click with force to avoid scrolling issues
653
+ await element.click({ force: browserConfig.forceClick });
654
+
655
+ try {
656
+ // Wait for new page to open
657
+ const newPage = await popupPromise;
658
+
659
+ // Generate tab ID for the new page
660
+ const newTabId = this.generateTabId();
661
+ this.registerNewPage(newTabId, newPage);
662
+
663
+ // Set up page properties
664
+ const browserConfig = this.configLoader.getBrowserConfig();
665
+ newPage.setDefaultNavigationTimeout(browserConfig.navigationTimeout);
666
+ newPage.setDefaultTimeout(browserConfig.navigationTimeout);
667
+
668
+
669
+ // Automatically switch to the new tab
670
+ this.currentTabId = newTabId;
671
+ await newPage.bringToFront();
672
+
673
+ // Wait for new page to be ready
674
+ await newPage.waitForLoadState('domcontentloaded', { timeout: browserConfig.popupTimeout }).catch(() => {});
675
+
676
+ return { success: true, method: 'playwright-aria-ref-newtab', newTabId };
677
+ } catch (popupError) {
678
+ return { success: true, method: 'playwright-aria-ref' };
679
+ }
680
+ } else {
681
+ // Add options to prevent scrolling issues
682
+ const browserConfig = this.configLoader.getBrowserConfig();
683
+ await element.click({ force: browserConfig.forceClick });
684
+
685
+ if (shouldCheckDiff && snapshotBefore) {
686
+ await page.waitForTimeout(300);
687
+ const snapshotAfter = await this.getSnapshot(page);
688
+ let diffSnapshot = this.getSnapshotDiff(snapshotBefore, snapshotAfter, ['option', 'menuitem']);
689
+
690
+ // For combobox, find the new ref based on aria-label and prepend to diffSnapshot
691
+ if (isCombobox && comboboxAriaLabel) {
692
+ const newComboboxRef = this.findComboboxRefByAriaLabel(snapshotAfter, comboboxAriaLabel);
693
+ if (newComboboxRef) {
694
+ // Find the full line for this combobox in the snapshot
695
+ const comboboxLine = this.findSnapshotLineByRef(snapshotAfter, newComboboxRef);
696
+ if (comboboxLine) {
697
+ diffSnapshot = comboboxLine + (diffSnapshot ? '\n' + diffSnapshot : '');
698
+ }
699
+ }
700
+ }
701
+
702
+ if (diffSnapshot && diffSnapshot.trim() !== '') {
703
+ return { success: true, method: 'playwright-aria-ref', diffSnapshot };
704
+ }
705
+ }
706
+
707
+ return { success: true, method: 'playwright-aria-ref' };
708
+ }
709
+
710
+ } catch (error) {
711
+ console.error('[performClick] Exception during click for ref: %s', ref, error);
712
+ return { success: false, error: `Click failed with exception: ${error}` };
713
+ }
714
+ }
715
+
716
+ /**
717
+ * Extract diff between two snapshots, returning only new elements of specified types
718
+ */
719
+ private getSnapshotDiff(snapshotBefore: string, snapshotAfter: string, targetRoles: string[]): string {
720
+ const refsBefore = new Set<string>();
721
+ const refPattern = /\[ref=([^\]]+)\]/g;
722
+ let match;
723
+ while ((match = refPattern.exec(snapshotBefore)) !== null) {
724
+ refsBefore.add(match[1]);
725
+ }
726
+
727
+ const lines = snapshotAfter.split('\n');
728
+ const newElements: string[] = [];
729
+
730
+ for (const line of lines) {
731
+ const refMatch = line.match(/\[ref=([^\]]+)\]/);
732
+ if (refMatch && !refsBefore.has(refMatch[1])) {
733
+ const hasTargetRole = targetRoles.some(role => {
734
+ const rolePattern = new RegExp(`\\b${role}\\b`, 'i');
735
+ return rolePattern.test(line);
736
+ });
737
+
738
+ if (hasTargetRole) {
739
+ newElements.push(line.trim());
740
+ }
741
+ }
742
+ }
743
+
744
+ if (newElements.length > 0) {
745
+ return newElements.join('\n');
746
+ } else {
747
+ return '';
748
+ }
749
+ }
750
+
751
+ /**
752
+ * Find a combobox ref in the snapshot by its aria-label or expanded state
753
+ */
754
+ private findComboboxRefByAriaLabel(snapshot: string, ariaLabel: string): string | null {
755
+ const lines = snapshot.split('\n');
756
+ // Escape special regex characters in ariaLabel
757
+ const escapedLabel = ariaLabel.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
758
+
759
+ // First, try to find by aria-label
760
+ for (const line of lines) {
761
+ const isCombobox = /\bcombobox\b/i.test(line);
762
+ const hasLabel = new RegExp(`["']${escapedLabel}["']`, 'i').test(line) ||
763
+ line.includes(ariaLabel);
764
+
765
+ if (isCombobox && hasLabel) {
766
+ const refMatch = line.match(/\[ref=([^\]]+)\]/);
767
+ if (refMatch) {
768
+ return refMatch[1];
769
+ }
770
+ }
771
+ }
772
+
773
+ // Fallback: find the expanded combobox (since we just clicked it)
774
+ for (const line of lines) {
775
+ const isCombobox = /\bcombobox\b/i.test(line);
776
+ const isExpanded = /\[expanded\]/i.test(line);
777
+
778
+ if (isCombobox && isExpanded) {
779
+ const refMatch = line.match(/\[ref=([^\]]+)\]/);
780
+ if (refMatch) {
781
+ return refMatch[1];
782
+ }
783
+ }
784
+ }
785
+
786
+ return null;
787
+ }
788
+
789
+ /**
790
+ * Find the full snapshot line for a given ref
791
+ */
792
+ private findSnapshotLineByRef(snapshot: string, ref: string): string | null {
793
+ const lines = snapshot.split('\n');
794
+ for (const line of lines) {
795
+ if (line.includes(`[ref=${ref}]`)) {
796
+ return line.trim();
797
+ }
798
+ }
799
+ return null;
800
+ }
801
+
802
+ /**
803
+ * Simplified type implementation using Playwright's aria-ref selector
804
+ * Supports both single and multiple input operations
805
+ */
806
+ private async performType(page: Page, ref: string | undefined, text: string | undefined, inputs?: Array<{ ref: string; text: string }>): Promise<{ success: boolean; error?: string; details?: Record<string, any>; diffSnapshot?: string }> {
807
+ try {
808
+ // Ensure we have the latest snapshot
809
+ await this.getSnapshot(page);
810
+
811
+ // Handle multiple inputs if provided
812
+ if (inputs && inputs.length > 0) {
813
+ const results: Record<string, { success: boolean; error?: string }> = {};
814
+
815
+ for (const input of inputs) {
816
+ const singleResult = await this.performType(page, input.ref, input.text);
817
+ results[input.ref] = {
818
+ success: singleResult.success,
819
+ error: singleResult.error
820
+ };
821
+ }
822
+
823
+ // Check if all inputs were successful
824
+ const allSuccess = Object.values(results).every(r => r.success);
825
+ const errors = Object.entries(results)
826
+ .filter(([_, r]) => !r.success)
827
+ .map(([ref, r]) => `${ref}: ${r.error}`)
828
+ .join('; ');
829
+
830
+ return {
831
+ success: allSuccess,
832
+ error: allSuccess ? undefined : `Some inputs failed: ${errors}`,
833
+ details: results
834
+ };
835
+ }
836
+
837
+ // Handle single input (backward compatibility)
838
+ if (ref && text !== undefined) {
839
+ const selector = `aria-ref=${ref}`;
840
+ const element = await page.locator(selector).first();
841
+
842
+ const exists = await element.count() > 0;
843
+ if (!exists) {
844
+ return { success: false, error: `Element with ref ${ref} not found` };
845
+ }
846
+
847
+ // Get element attributes to check if it's readonly or a special input type
848
+ let originalPlaceholder: string | null = null;
849
+ let isReadonly = false;
850
+ let elementType: string | null = null;
851
+ let isCombobox = false;
852
+ let isTextbox = false;
853
+ let shouldCheckDiff = false;
854
+
855
+ try {
856
+ // Get element info in one evaluation to minimize interactions
857
+ const elementInfo = await element.evaluate((el: any) => {
858
+ return {
859
+ placeholder: el.placeholder || null,
860
+ readonly: el.readOnly || el.hasAttribute('readonly'),
861
+ type: el.type || null,
862
+ tagName: el.tagName.toLowerCase(),
863
+ disabled: el.disabled || false,
864
+ role: el.getAttribute('role'),
865
+ ariaHaspopup: el.getAttribute('aria-haspopup')
866
+ };
867
+ });
868
+
869
+ originalPlaceholder = elementInfo.placeholder;
870
+ isReadonly = elementInfo.readonly;
871
+ elementType = elementInfo.type;
872
+ isCombobox = elementInfo.role === 'combobox' ||
873
+ elementInfo.tagName === 'combobox' ||
874
+ elementInfo.ariaHaspopup === 'listbox';
875
+ isTextbox = elementInfo.role === 'textbox' ||
876
+ elementInfo.tagName === 'input' ||
877
+ elementInfo.tagName === 'textarea';
878
+ shouldCheckDiff = isCombobox || isTextbox;
879
+
880
+ } catch (e) {
881
+ console.log(`Warning: Failed to get element attributes: ${e}`);
882
+ }
883
+
884
+ // Get snapshot before action to record existing elements
885
+ const snapshotBefore = await this.getSnapshot(page);
886
+ const existingRefs = new Set<string>();
887
+ const refPattern = /\[ref=([^\]]+)\]/g;
888
+ let match;
889
+ while ((match = refPattern.exec(snapshotBefore)) !== null) {
890
+ existingRefs.add(match[1]);
891
+ }
892
+ console.log(`Found ${existingRefs.size} total elements before action`);
893
+
894
+ // If element is readonly or a date/time input, skip fill attempt and go directly to click
895
+ if (isReadonly || ['date', 'datetime-local', 'time'].includes(elementType || '')) {
896
+ console.log(`Element ref=${ref} is readonly or date/time input, skipping direct fill attempt`);
897
+
898
+ // Click with force option to avoid scrolling
899
+ try {
900
+ await element.click({ force: true });
901
+ console.log(`Clicked readonly/special element ref=${ref} to trigger dynamic content`);
902
+ // Wait for potential dynamic content to appear
903
+ await page.waitForTimeout(500);
904
+ } catch (clickError) {
905
+ console.log(`Warning: Failed to click element: ${clickError}`);
906
+ }
907
+ } else {
908
+ // Try to fill the element, with fallback to click-then-fill strategy
909
+ let alreadyClicked = false;
910
+ try {
911
+ let fillSuccess = false;
912
+
913
+ try {
914
+ // Strategy 1: Try to fill directly without clicking (for modern inputs like Google Flights combobox)
915
+ await element.fill(text, { timeout: 3000, force: true });
916
+ fillSuccess = true;
917
+ console.log(`Filled element ref=${ref} directly without clicking`);
918
+ } catch (directFillError) {
919
+ // Strategy 2: Click first, then fill (for traditional inputs that need activation)
920
+ console.log(`Direct fill failed for ref=${ref}, trying click-then-fill strategy`);
921
+ try {
922
+ await element.click({ force: true });
923
+ alreadyClicked = true;
924
+ console.log(`Clicked element ref=${ref} before typing`);
925
+ } catch (clickError) {
926
+ console.log(`Warning: Failed to click element before typing: ${clickError}`);
927
+ }
928
+
929
+ try {
930
+ await element.fill(text, { timeout: 3000, force: true });
931
+ fillSuccess = true;
932
+ console.log(`Filled element ref=${ref} after clicking`);
933
+ } catch (secondFillError) {
934
+ // Will be handled by outer catch block below
935
+ throw secondFillError;
936
+ }
937
+ }
938
+
939
+ if (fillSuccess) {
940
+ // If this element might show dropdown, wait and check for new elements
941
+ if (shouldCheckDiff) {
942
+ await page.waitForTimeout(300);
943
+ const snapshotAfter = await this.getSnapshot(page);
944
+ const diffSnapshot = this.getSnapshotDiff(snapshotBefore, snapshotAfter, ['option', 'menuitem']);
945
+
946
+ if (diffSnapshot && diffSnapshot.trim() !== '') {
947
+ return { success: true, diffSnapshot };
948
+ }
949
+ }
950
+
951
+ return { success: true };
952
+ }
953
+ } catch (fillError: any) {
954
+ // Log the error for debugging
955
+ console.log(`Fill error for ref ${ref}: ${fillError.message}`);
956
+
957
+ // Check for various error messages that indicate the element is not fillable
958
+ const errorMessage = fillError.message.toLowerCase();
959
+ if (errorMessage.includes('not an <input>') ||
960
+ errorMessage.includes('not have a role allowing') ||
961
+ errorMessage.includes('element is not') ||
962
+ errorMessage.includes('cannot type') ||
963
+ errorMessage.includes('readonly') ||
964
+ errorMessage.includes('not editable') ||
965
+ errorMessage.includes('timeout') ||
966
+ errorMessage.includes('timeouterror')) {
967
+
968
+ // Click the element again to trigger dynamic content (like date pickers), but only if we haven't clicked yet
969
+ if (!alreadyClicked) {
970
+ try {
971
+ await element.click({ force: true });
972
+ console.log(`Clicked element ref=${ref} to trigger dynamic content`);
973
+ // Wait for potential dynamic content to appear
974
+ await page.waitForTimeout(500);
975
+ } catch (clickError) {
976
+ console.log(`Warning: Failed to click element to trigger dynamic content: ${clickError}`);
977
+ }
978
+ } else {
979
+ // We already clicked during the click-then-fill strategy
980
+ await page.waitForTimeout(500);
981
+ }
982
+
983
+ // Step 1: Try to find input elements within the clicked element
984
+ const inputSelector = `input:visible, textarea:visible, [contenteditable="true"]:visible, [role="textbox"]:visible`;
985
+ const inputElement = await element.locator(inputSelector).first();
986
+
987
+ const inputExists = await inputElement.count() > 0;
988
+ if (inputExists) {
989
+ console.log(`Found input element within ref ${ref}, attempting to fill`);
990
+ try {
991
+ await inputElement.fill(text, { force: true });
992
+
993
+ // If element might show dropdown, check for new elements
994
+ if (shouldCheckDiff) {
995
+ await page.waitForTimeout(300);
996
+ const snapshotFinal = await this.getSnapshot(page);
997
+ const diffSnapshot = this.getSnapshotDiff(snapshotBefore, snapshotFinal, ['option', 'menuitem']);
998
+
999
+ if (diffSnapshot && diffSnapshot.trim() !== '') {
1000
+ return { success: true, diffSnapshot };
1001
+ }
1002
+ }
1003
+
1004
+ return { success: true };
1005
+ } catch (innerError) {
1006
+ console.log(`Failed to fill child element: ${innerError}`);
1007
+ }
1008
+ }
1009
+
1010
+ // Step 2: Look for new elements that appeared after the action
1011
+ console.log(`Looking for new elements that appeared after action...`);
1012
+
1013
+ // Get snapshot after action to find new elements
1014
+ const snapshotAfter = await this.getSnapshot(page);
1015
+ const newRefs = new Set<string>();
1016
+ const afterRefPattern = /\[ref=([^\]]+)\]/g;
1017
+ let afterMatch;
1018
+ while ((afterMatch = afterRefPattern.exec(snapshotAfter)) !== null) {
1019
+ const refId = afterMatch[1];
1020
+ if (!existingRefs.has(refId)) {
1021
+ newRefs.add(refId);
1022
+ }
1023
+ }
1024
+
1025
+ console.log(`Found ${newRefs.size} new elements after action`);
1026
+
1027
+ // If we have a placeholder, try to find new input elements with that placeholder
1028
+ if (originalPlaceholder && newRefs.size > 0) {
1029
+ console.log(`Looking for new input elements with placeholder: ${originalPlaceholder}`);
1030
+
1031
+ // Try each new ref to see if it's an input with our placeholder
1032
+ for (const newRef of newRefs) {
1033
+ try {
1034
+ const newElement = await page.locator(`aria-ref=${newRef}`).first();
1035
+ const tagName = await newElement.evaluate(el => el.tagName.toLowerCase()).catch(() => null);
1036
+
1037
+ if (tagName === 'input' || tagName === 'textarea') {
1038
+ const placeholder = await newElement.getAttribute('placeholder').catch(() => null);
1039
+ if (placeholder === originalPlaceholder) {
1040
+ console.log(`Found new input element with matching placeholder: ref=${newRef}`);
1041
+
1042
+ // Check if it's visible and fillable
1043
+ const elementInfo = await newElement.evaluate((el: any) => {
1044
+ return {
1045
+ tagName: el.tagName,
1046
+ id: el.id,
1047
+ className: el.className,
1048
+ placeholder: el.placeholder,
1049
+ isVisible: el.offsetParent !== null,
1050
+ isReadonly: el.readOnly || el.getAttribute('readonly') !== null
1051
+ };
1052
+ });
1053
+ console.log(`New element details:`, JSON.stringify(elementInfo));
1054
+
1055
+ // Try to fill it with force to avoid scrolling
1056
+ await newElement.fill(text, { force: true });
1057
+
1058
+ // If element might show dropdown, check for new elements
1059
+ if (shouldCheckDiff) {
1060
+ await page.waitForTimeout(300);
1061
+ const snapshotFinal = await this.getSnapshot(page);
1062
+ const diffSnapshot = this.getSnapshotDiff(snapshotBefore, snapshotFinal, ['option', 'menuitem']);
1063
+
1064
+ if (diffSnapshot && diffSnapshot.trim() !== '') {
1065
+ return { success: true, diffSnapshot };
1066
+ }
1067
+ }
1068
+
1069
+ return { success: true };
1070
+ }
1071
+ }
1072
+ } catch (e) {
1073
+ // Ignore errors for non-input elements
1074
+ }
1075
+ }
1076
+ }
1077
+
1078
+ console.log(`No suitable input element found for ref ${ref}`);
1079
+ }
1080
+ // Re-throw the original error if we couldn't find an input element
1081
+ throw fillError;
1082
+ }
1083
+ }
1084
+
1085
+ // If we skipped the fill attempt (readonly elements), look for new elements directly
1086
+ if (isReadonly || ['date', 'datetime-local', 'time'].includes(elementType || '')) {
1087
+ // Look for new elements that appeared after clicking
1088
+ console.log(`Looking for new elements that appeared after clicking readonly element...`);
1089
+
1090
+ // Get snapshot after action to find new elements
1091
+ const snapshotAfter = await this.getSnapshot(page);
1092
+ const newRefs = new Set<string>();
1093
+ const afterRefPattern = /\[ref=([^\]]+)\]/g;
1094
+ let afterMatch;
1095
+ while ((afterMatch = afterRefPattern.exec(snapshotAfter)) !== null) {
1096
+ const refId = afterMatch[1];
1097
+ if (!existingRefs.has(refId)) {
1098
+ newRefs.add(refId);
1099
+ }
1100
+ }
1101
+
1102
+ console.log(`Found ${newRefs.size} new elements after clicking readonly element`);
1103
+
1104
+ // If we have a placeholder, try to find new input elements with that placeholder
1105
+ if (originalPlaceholder && newRefs.size > 0) {
1106
+ console.log(`Looking for new input elements with placeholder: ${originalPlaceholder}`);
1107
+
1108
+ // Try each new ref to see if it's an input with our placeholder
1109
+ for (const newRef of newRefs) {
1110
+ try {
1111
+ const newElement = await page.locator(`aria-ref=${newRef}`).first();
1112
+ const tagName = await newElement.evaluate(el => el.tagName.toLowerCase()).catch(() => null);
1113
+
1114
+ if (tagName === 'input' || tagName === 'textarea') {
1115
+ const placeholder = await newElement.getAttribute('placeholder').catch(() => null);
1116
+ if (placeholder === originalPlaceholder) {
1117
+ console.log(`Found new input element with matching placeholder: ref=${newRef}`);
1118
+
1119
+ // Check if it's visible and fillable
1120
+ const elementInfo = await newElement.evaluate((el: any) => {
1121
+ return {
1122
+ tagName: el.tagName,
1123
+ id: el.id,
1124
+ className: el.className,
1125
+ placeholder: el.placeholder,
1126
+ isVisible: el.offsetParent !== null,
1127
+ isReadonly: el.readOnly || el.getAttribute('readonly') !== null
1128
+ };
1129
+ });
1130
+ console.log(`New element details:`, JSON.stringify(elementInfo));
1131
+
1132
+ // Try to fill it with force to avoid scrolling
1133
+ await newElement.fill(text, { force: true });
1134
+
1135
+ // If element might show dropdown, check for new elements
1136
+ if (shouldCheckDiff) {
1137
+ await page.waitForTimeout(300);
1138
+ const snapshotFinal = await this.getSnapshot(page);
1139
+ const diffSnapshot = this.getSnapshotDiff(snapshotBefore, snapshotFinal, ['option', 'menuitem']);
1140
+
1141
+ if (diffSnapshot && diffSnapshot.trim() !== '') {
1142
+ return { success: true, diffSnapshot };
1143
+ }
1144
+ }
1145
+
1146
+ return { success: true };
1147
+ }
1148
+ }
1149
+ } catch (e) {
1150
+ // Ignore errors for non-input elements
1151
+ }
1152
+ }
1153
+ }
1154
+
1155
+ console.log(`No suitable input element found for readonly ref ${ref}`);
1156
+ return { success: false, error: `Element ref=${ref} is readonly and no suitable input was found` };
1157
+ }
1158
+ }
1159
+
1160
+ return { success: false, error: 'No valid input provided' };
1161
+ } catch (error) {
1162
+ return { success: false, error: `Type failed: ${error}` };
1163
+ }
1164
+ }
1165
+
1166
+ /**
1167
+ * Simplified select implementation using Playwright's aria-ref selector
1168
+ */
1169
+ private async performSelect(page: Page, ref: string, value: string): Promise<{ success: boolean; error?: string }> {
1170
+ try {
1171
+ // Ensure we have the latest snapshot
1172
+ await this.getSnapshot(page);
1173
+
1174
+ // Use Playwright's aria-ref selector
1175
+ const selector = `aria-ref=${ref}`;
1176
+ const element = await page.locator(selector).first();
1177
+
1178
+ const exists = await element.count() > 0;
1179
+ if (!exists) {
1180
+ return { success: false, error: `Element with ref ${ref} not found` };
1181
+ }
1182
+
1183
+ // Select value using Playwright's built-in selectOption method
1184
+ await element.selectOption(value);
1185
+
1186
+ return { success: true };
1187
+ } catch (error) {
1188
+ return { success: false, error: `Select failed: ${error}` };
1189
+ }
1190
+ }
1191
+
1192
+ /**
1193
+ * Simplified mouse control implementation
1194
+ */
1195
+ private async performMouseControl(page: Page, control: string, x: number, y: number): Promise<{ success: boolean; error?: string }> {
1196
+ try {
1197
+ const viewport = page.viewportSize();
1198
+ if (!viewport) {
1199
+ return { success: false, error: 'Viewport size not available from page.' };
1200
+ }
1201
+ if (x < 0 || y < 0 || x > viewport.width || y > viewport.height) {
1202
+ return { success: false, error: `Invalid coordinates, outside viewport bounds: (${x}, ${y})` };
1203
+ }
1204
+ switch (control) {
1205
+ case 'click': {
1206
+ await page.mouse.click(x, y);
1207
+ break;
1208
+ }
1209
+ case 'right_click': {
1210
+ await page.mouse.click(x, y, { button: 'right' });
1211
+ break;
1212
+ }
1213
+ case 'dblclick': {
1214
+ await page.mouse.dblclick(x, y);
1215
+ break;
1216
+ }
1217
+ default:
1218
+ return { success: false, error: `Invalid control action: ${control}` };
1219
+ }
1220
+
1221
+ return { success: true };
1222
+ } catch (error) {
1223
+ return { success: false, error: `Mouse action failed: ${error}` };
1224
+ }
1225
+ }
1226
+
1227
+ /**
1228
+ * Enhanced mouse drag and drop implementation using ref IDs
1229
+ */
1230
+ private async performMouseDrag(page: Page, fromRef: string, toRef: string): Promise<{ success: boolean; error?: string }> {
1231
+ try {
1232
+ // Ensure we have the latest snapshot
1233
+ await this.getSnapshot(page);
1234
+
1235
+ // Get elements using Playwright's aria-ref selector
1236
+ const fromSelector = `aria-ref=${fromRef}`;
1237
+ const toSelector = `aria-ref=${toRef}`;
1238
+
1239
+ const fromElement = await page.locator(fromSelector).first();
1240
+ const toElement = await page.locator(toSelector).first();
1241
+
1242
+ // Check if elements exist
1243
+ const fromExists = await fromElement.count() > 0;
1244
+ const toExists = await toElement.count() > 0;
1245
+
1246
+ if (!fromExists) {
1247
+ return { success: false, error: `Source element with ref ${fromRef} not found` };
1248
+ }
1249
+
1250
+ if (!toExists) {
1251
+ return { success: false, error: `Target element with ref ${toRef} not found` };
1252
+ }
1253
+
1254
+ // Get the center coordinates of both elements
1255
+ const fromBox = await fromElement.boundingBox();
1256
+ const toBox = await toElement.boundingBox();
1257
+
1258
+ if (!fromBox) {
1259
+ return { success: false, error: `Could not get bounding box for source element with ref ${fromRef}` };
1260
+ }
1261
+
1262
+ if (!toBox) {
1263
+ return { success: false, error: `Could not get bounding box for target element with ref ${toRef}` };
1264
+ }
1265
+
1266
+ const fromX = fromBox.x + fromBox.width / 2;
1267
+ const fromY = fromBox.y + fromBox.height / 2;
1268
+ const toX = toBox.x + toBox.width / 2;
1269
+ const toY = toBox.y + toBox.height / 2;
1270
+
1271
+ // Perform the drag operation
1272
+ await page.mouse.move(fromX, fromY);
1273
+ await page.mouse.down();
1274
+ // Destination coordinates
1275
+ await page.mouse.move(toX, toY);
1276
+ await page.mouse.up();
1277
+
1278
+ return { success: true };
1279
+ } catch (error) {
1280
+ return { success: false, error: `Mouse drag action failed: ${error}` };
1281
+ }
1282
+ }
1283
+
1284
+ async executeAction(action: BrowserAction): Promise<ActionResult> {
1285
+ const startTime = Date.now();
1286
+ const page = await this.getCurrentPage();
1287
+
1288
+ let elementSearchTime = 0;
1289
+ let actionExecutionTime = 0;
1290
+ let stabilityWaitTime = 0;
1291
+
1292
+ try {
1293
+ const elementSearchStart = Date.now();
1294
+
1295
+ // No need to pre-fetch snapshot - each action method handles this
1296
+
1297
+ let newTabId: string | undefined;
1298
+ let customMessage: string | undefined;
1299
+ let actionDetails: Record<string, any> | undefined;
1300
+
1301
+ switch (action.type) {
1302
+ case 'click': {
1303
+ elementSearchTime = Date.now() - elementSearchStart;
1304
+ const clickStart = Date.now();
1305
+
1306
+ // Use simplified click logic
1307
+ const clickResult = await this.performClick(page, action.ref);
1308
+
1309
+ if (!clickResult.success) {
1310
+ throw new Error(`Click failed: ${clickResult.error}`);
1311
+ }
1312
+
1313
+ // Capture new tab ID if present
1314
+ newTabId = clickResult.newTabId;
1315
+
1316
+ // Capture diff snapshot if present
1317
+ if (clickResult.diffSnapshot) {
1318
+ actionDetails = { diffSnapshot: clickResult.diffSnapshot };
1319
+ }
1320
+
1321
+ actionExecutionTime = Date.now() - clickStart;
1322
+ break;
1323
+ }
1324
+
1325
+ case 'type': {
1326
+ elementSearchTime = Date.now() - elementSearchStart;
1327
+ const typeStart = Date.now();
1328
+
1329
+ const typeResult = await this.performType(page, action.ref, action.text, action.inputs);
1330
+
1331
+ if (!typeResult.success) {
1332
+ throw new Error(`Type failed: ${typeResult.error}`);
1333
+ }
1334
+
1335
+ // Set custom message and details if multiple inputs were used
1336
+ if (typeResult.details) {
1337
+ const successCount = Object.values(typeResult.details).filter((r: any) => r.success).length;
1338
+ const totalCount = Object.keys(typeResult.details).length;
1339
+ customMessage = `Typed text into ${successCount}/${totalCount} elements`;
1340
+ actionDetails = typeResult.details;
1341
+ }
1342
+
1343
+ // Capture diff snapshot if present
1344
+ if (typeResult.diffSnapshot) {
1345
+ if (!actionDetails) {
1346
+ actionDetails = {};
1347
+ }
1348
+ actionDetails.diffSnapshot = typeResult.diffSnapshot;
1349
+ }
1350
+
1351
+ actionExecutionTime = Date.now() - typeStart;
1352
+ break;
1353
+ }
1354
+
1355
+ case 'select': {
1356
+ elementSearchTime = Date.now() - elementSearchStart;
1357
+ const selectStart = Date.now();
1358
+
1359
+ const selectResult = await this.performSelect(page, action.ref, action.value);
1360
+
1361
+ if (!selectResult.success) {
1362
+ throw new Error(`Select failed: ${selectResult.error}`);
1363
+ }
1364
+
1365
+ actionExecutionTime = Date.now() - selectStart;
1366
+ break;
1367
+ }
1368
+
1369
+ case 'scroll': {
1370
+ elementSearchTime = Date.now() - elementSearchStart;
1371
+ const scrollStart = Date.now();
1372
+ const scrollAmount = action.direction === 'up' ? -action.amount : action.amount;
1373
+ await page.evaluate((amount: number) => {
1374
+ window.scrollBy(0, amount);
1375
+ }, scrollAmount);
1376
+ // Update scroll position tracking
1377
+ await this.getCurrentScrollPosition();
1378
+ actionExecutionTime = Date.now() - scrollStart;
1379
+ break;
1380
+ }
1381
+
1382
+ case 'enter': {
1383
+ elementSearchTime = Date.now() - elementSearchStart;
1384
+ const enterStart = Date.now();
1385
+ const browserConfig = this.configLoader.getBrowserConfig();
1386
+ await page.keyboard.press(browserConfig.enterKey);
1387
+ actionExecutionTime = Date.now() - enterStart;
1388
+ break;
1389
+ }
1390
+
1391
+ case 'mouse_control': {
1392
+ elementSearchTime = Date.now() - elementSearchStart;
1393
+ const mouseControlStart = Date.now();
1394
+ const mouseControlResult = await this.performMouseControl(page, action.control, action.x, action.y);
1395
+
1396
+ if (!mouseControlResult.success) {
1397
+ throw new Error(`Action failed: ${mouseControlResult.error}`);
1398
+ }
1399
+ actionExecutionTime = Date.now() - mouseControlStart;
1400
+ break;
1401
+ }
1402
+
1403
+ case 'mouse_drag': {
1404
+ elementSearchTime = Date.now() - elementSearchStart;
1405
+ const mouseDragStart = Date.now();
1406
+ const mouseDragResult = await this.performMouseDrag(page, action.from_ref, action.to_ref);
1407
+
1408
+ if (!mouseDragResult.success) {
1409
+ throw new Error(`Action failed: ${mouseDragResult.error}`);
1410
+ }
1411
+ actionExecutionTime = Date.now() - mouseDragStart;
1412
+ break;
1413
+ }
1414
+
1415
+ case 'press_key': {
1416
+ elementSearchTime = Date.now() - elementSearchStart;
1417
+ const keyPressStart = Date.now();
1418
+ // concatenate keys with '+' for key combinations
1419
+ const keys = action.keys.join('+');
1420
+ await page.keyboard.press(keys);
1421
+ actionExecutionTime = Date.now() - keyPressStart;
1422
+ break;
1423
+ }
1424
+
1425
+ default:
1426
+ throw new Error(`Unknown action type: ${(action as any).type}`);
1427
+ }
1428
+
1429
+ // Wait for stability after action
1430
+ const stabilityStart = Date.now();
1431
+ const stabilityResult = await this.waitForPageStability(page);
1432
+ stabilityWaitTime = Date.now() - stabilityStart;
1433
+
1434
+ const totalTime = Date.now() - startTime;
1435
+
1436
+ return {
1437
+ success: true,
1438
+ message: customMessage || `Action ${action.type} executed successfully`,
1439
+ timing: {
1440
+ total_time_ms: totalTime,
1441
+ element_search_time_ms: elementSearchTime,
1442
+ action_execution_time_ms: actionExecutionTime,
1443
+ stability_wait_time_ms: stabilityWaitTime,
1444
+ dom_content_loaded_time_ms: stabilityResult.domContentLoadedTime,
1445
+ network_idle_time_ms: stabilityResult.networkIdleTime,
1446
+ },
1447
+ ...(newTabId && { newTabId }), // Include new tab ID if present
1448
+ ...(actionDetails && { details: actionDetails }), // Include action details if present
1449
+ };
1450
+ } catch (error) {
1451
+ const totalTime = Date.now() - startTime;
1452
+ return {
1453
+ success: false,
1454
+ message: `Action ${action.type} failed: ${error}`,
1455
+ timing: {
1456
+ total_time_ms: totalTime,
1457
+ element_search_time_ms: elementSearchTime,
1458
+ action_execution_time_ms: actionExecutionTime,
1459
+ stability_wait_time_ms: stabilityWaitTime,
1460
+ },
1461
+ };
1462
+ }
1463
+ }
1464
+
1465
+ /**
1466
+ * Wait for DOM to stop changing for a specified duration
1467
+ */
1468
+ private async waitForDOMStability(page: Page, maxWaitTime: number = 500): Promise<void> {
1469
+ const startTime = Date.now();
1470
+ const stabilityThreshold = 100; // Consider stable if no changes for 100ms
1471
+ let lastChangeTime = Date.now();
1472
+
1473
+ try {
1474
+ // Monitor DOM changes
1475
+ await page.evaluate(() => {
1476
+ let changeCount = 0;
1477
+ (window as any).__domStabilityCheck = { changeCount: 0, lastChange: Date.now() };
1478
+
1479
+ const observer = new MutationObserver(() => {
1480
+ (window as any).__domStabilityCheck.changeCount++;
1481
+ (window as any).__domStabilityCheck.lastChange = Date.now();
1482
+ });
1483
+
1484
+ observer.observe(document.body, {
1485
+ childList: true,
1486
+ subtree: true,
1487
+ attributes: true,
1488
+ characterData: true
1489
+ });
1490
+
1491
+ (window as any).__domStabilityObserver = observer;
1492
+ });
1493
+
1494
+ // Wait until no changes for stabilityThreshold or timeout
1495
+ await page.waitForFunction(
1496
+ (threshold) => {
1497
+ const check = (window as any).__domStabilityCheck;
1498
+ return check && (Date.now() - check.lastChange) > threshold;
1499
+ },
1500
+ stabilityThreshold,
1501
+ { timeout: Math.max(0, maxWaitTime) }
1502
+ ).catch(() => {});
1503
+ } finally {
1504
+ // Cleanup
1505
+ await page.evaluate(() => {
1506
+ const observer = (window as any).__domStabilityObserver;
1507
+ if (observer) observer.disconnect();
1508
+ delete (window as any).__domStabilityObserver;
1509
+ delete (window as any).__domStabilityCheck;
1510
+ }).catch(() => {});
1511
+ }
1512
+ }
1513
+
1514
+ private async waitForPageStability(page: Page): Promise<{ domContentLoadedTime: number; networkIdleTime: number }> {
1515
+ let domContentLoadedTime = 0;
1516
+ let networkIdleTime = 0;
1517
+
1518
+ try {
1519
+ const domStart = Date.now();
1520
+ const browserConfig = this.configLoader.getBrowserConfig();
1521
+ await page.waitForLoadState(browserConfig.domContentLoadedState as any, { timeout: browserConfig.pageStabilityTimeout });
1522
+ domContentLoadedTime = Date.now() - domStart;
1523
+
1524
+ const networkStart = Date.now();
1525
+ await page.waitForLoadState(browserConfig.networkIdleState as any, { timeout: browserConfig.networkIdleTimeout });
1526
+ networkIdleTime = Date.now() - networkStart;
1527
+ } catch (error) {
1528
+ // Continue even if stability wait fails
1529
+ }
1530
+
1531
+ return { domContentLoadedTime, networkIdleTime };
1532
+ }
1533
+
1534
+ async visitPage(url: string): Promise<ActionResult & { newTabId?: string }> {
1535
+ const startTime = Date.now();
1536
+
1537
+ try {
1538
+ // Get current page to check if it's blank
1539
+ let currentPage: Page;
1540
+ let currentUrl: string;
1541
+
1542
+ try {
1543
+ currentPage = await this.getCurrentPage();
1544
+ currentUrl = currentPage.url();
1545
+ } catch (error: any) {
1546
+ // If no active page is available, getCurrentPage() will create one in CDP mode
1547
+ console.log('[visitPage] Failed to get current page:', error);
1548
+ throw new Error(`No active page available: ${error?.message || error}`);
1549
+ }
1550
+
1551
+ // Check if current page is blank or if this is the first navigation
1552
+ const browserConfig = this.configLoader.getBrowserConfig();
1553
+
1554
+ // Use unified blank page detection
1555
+ const isBlankPage = this.isBlankPageUrl(currentUrl) || currentUrl === browserConfig.defaultStartUrl;
1556
+
1557
+ const shouldUseCurrentTab = isBlankPage || !this.hasNavigatedBefore;
1558
+
1559
+
1560
+ if (shouldUseCurrentTab) {
1561
+ // Navigate in current tab if it's blank
1562
+
1563
+ const navigationStart = Date.now();
1564
+ const browserConfig = this.configLoader.getBrowserConfig();
1565
+ await currentPage.goto(url, {
1566
+ timeout: browserConfig.navigationTimeout,
1567
+ waitUntil: browserConfig.domContentLoadedState as any
1568
+ });
1569
+
1570
+ // Reset scroll position after navigation
1571
+ this.scrollPosition = { x: 0, y: 0 };
1572
+
1573
+ // Mark that we've navigated
1574
+ this.hasNavigatedBefore = true;
1575
+
1576
+ const navigationTime = Date.now() - navigationStart;
1577
+ const stabilityResult = await this.waitForPageStability(currentPage);
1578
+ const totalTime = Date.now() - startTime;
1579
+
1580
+ return {
1581
+ success: true,
1582
+ message: `Navigated to ${url}`,
1583
+ timing: {
1584
+ total_time_ms: totalTime,
1585
+ navigation_time_ms: navigationTime,
1586
+ dom_content_loaded_time_ms: stabilityResult.domContentLoadedTime,
1587
+ network_idle_time_ms: stabilityResult.networkIdleTime,
1588
+ },
1589
+ };
1590
+ } else {
1591
+ // Open in new tab if current page has content
1592
+ if (!this.context) {
1593
+ throw new Error('Browser context not initialized');
1594
+ }
1595
+
1596
+ const navigationStart = Date.now();
1597
+
1598
+ // In CDP mode, find an available blank tab instead of creating new page
1599
+ let newPage: Page | null = null;
1600
+ let newTabId: string | null = null;
1601
+
1602
+ const browserConfig = this.configLoader.getBrowserConfig();
1603
+ if (browserConfig.cdpUrl) {
1604
+ // CDP mode: find an available blank tab
1605
+ const allPages = this.context.pages();
1606
+ for (const page of allPages) {
1607
+ const pageUrl = page.url();
1608
+ // Check if this page is not already tracked and is blank
1609
+ const isTracked = Array.from(this.pages.values()).includes(page);
1610
+ if (!isTracked && this.isBlankPageUrl(pageUrl)) {
1611
+ newPage = page;
1612
+ newTabId = this.generateTabId();
1613
+ this.registerNewPage(newTabId, newPage);
1614
+ break;
1615
+ }
1616
+ }
1617
+
1618
+ if (!newPage || !newTabId) {
1619
+ console.log('[CDP] No available blank tabs, creating new page');
1620
+ newPage = await this.context.newPage();
1621
+ newTabId = this.generateTabId();
1622
+ this.registerNewPage(newTabId, newPage);
1623
+ }
1624
+ } else {
1625
+ // Non-CDP mode: create new page as usual
1626
+ newPage = await this.context.newPage();
1627
+ newTabId = this.generateTabId();
1628
+ this.registerNewPage(newTabId, newPage);
1629
+ }
1630
+
1631
+ // Set up page properties
1632
+ newPage.setDefaultNavigationTimeout(browserConfig.navigationTimeout);
1633
+ newPage.setDefaultTimeout(browserConfig.navigationTimeout);
1634
+
1635
+ // Navigate to the URL
1636
+ await newPage.goto(url, {
1637
+ timeout: browserConfig.navigationTimeout,
1638
+ waitUntil: browserConfig.domContentLoadedState as any
1639
+ });
1640
+
1641
+ // Automatically switch to the new tab
1642
+ this.currentTabId = newTabId;
1643
+ await newPage.bringToFront();
1644
+
1645
+ // Reset scroll position for the new page
1646
+ this.scrollPosition = { x: 0, y: 0 };
1647
+
1648
+ // Mark that we've navigated
1649
+ this.hasNavigatedBefore = true;
1650
+
1651
+ const navigationTime = Date.now() - navigationStart;
1652
+ const stabilityResult = await this.waitForPageStability(newPage);
1653
+ const totalTime = Date.now() - startTime;
1654
+
1655
+ return {
1656
+ success: true,
1657
+ message: `Opened ${url} in new tab`,
1658
+ newTabId: newTabId, // Include the new tab ID
1659
+ timing: {
1660
+ total_time_ms: totalTime,
1661
+ navigation_time_ms: navigationTime,
1662
+ dom_content_loaded_time_ms: stabilityResult.domContentLoadedTime,
1663
+ network_idle_time_ms: stabilityResult.networkIdleTime,
1664
+ },
1665
+ };
1666
+ }
1667
+ } catch (error) {
1668
+ const totalTime = Date.now() - startTime;
1669
+ return {
1670
+ success: false,
1671
+ message: `Navigation to ${url} failed: ${error}`,
1672
+ timing: {
1673
+ total_time_ms: totalTime,
1674
+ navigation_time_ms: 0,
1675
+ dom_content_loaded_time_ms: 0,
1676
+ network_idle_time_ms: 0,
1677
+ },
1678
+ };
1679
+ }
1680
+ }
1681
+
1682
+ async switchToTab(tabId: string): Promise<boolean> {
1683
+ if (!this.pages.has(tabId)) {
1684
+ return false;
1685
+ }
1686
+
1687
+ const page = this.pages.get(tabId)!;
1688
+
1689
+ if (page.isClosed()) {
1690
+ this.pages.delete(tabId);
1691
+ return false;
1692
+ }
1693
+
1694
+ try {
1695
+ console.log(`Switching to tab ${tabId}`);
1696
+
1697
+ // Update internal state first
1698
+ this.currentTabId = tabId;
1699
+
1700
+ // Try to activate the tab using a gentler approach
1701
+ // Instead of bringToFront, we'll use a combination of methods
1702
+ try {
1703
+ // Method 1: Evaluate focus in the page context
1704
+ await page.evaluate(() => {
1705
+ // Focus the window
1706
+ window.focus();
1707
+ // Dispatch a focus event
1708
+ window.dispatchEvent(new Event('focus'));
1709
+ }).catch(() => {});
1710
+
1711
+ // Method 2: For non-headless mode, schedule bringToFront asynchronously
1712
+ // This prevents WebSocket disruption by not blocking the current operation
1713
+ if (!this.configLoader.getBrowserConfig().headless) {
1714
+ // Use Promise to handle async operation without await
1715
+ Promise.resolve().then(async () => {
1716
+ // Small delay to ensure WebSocket message is processed
1717
+ const browserConfig = this.configLoader.getBrowserConfig();
1718
+ await new Promise(resolve => setTimeout(resolve, browserConfig.navigationDelay));
1719
+ try {
1720
+ await page.bringToFront();
1721
+ } catch (e) {
1722
+ // Silently ignore - tab switching still works internally
1723
+ console.debug(`bringToFront failed for ${tabId}, but tab is switched internally`);
1724
+ }
1725
+ });
1726
+ }
1727
+ } catch (error) {
1728
+ // Log but don't fail - internal state is still updated
1729
+ console.warn(`Tab focus warning for ${tabId}:`, error);
1730
+ }
1731
+
1732
+ console.log(`Successfully switched to tab ${tabId}`);
1733
+ return true;
1734
+ } catch (error) {
1735
+ console.error(`Error switching to tab ${tabId}:`, error);
1736
+ return false;
1737
+ }
1738
+ }
1739
+
1740
+ async closeTab(tabId: string): Promise<boolean> {
1741
+ if (!this.pages.has(tabId)) {
1742
+ return false;
1743
+ }
1744
+
1745
+ const page = this.pages.get(tabId)!;
1746
+
1747
+ if (!page.isClosed()) {
1748
+ await page.close();
1749
+ }
1750
+
1751
+ this.pages.delete(tabId);
1752
+
1753
+ if (tabId === this.currentTabId) {
1754
+ const remainingTabs = Array.from(this.pages.keys());
1755
+ if (remainingTabs.length > 0) {
1756
+ this.currentTabId = remainingTabs[0];
1757
+ } else {
1758
+ this.currentTabId = null;
1759
+ }
1760
+ }
1761
+
1762
+ return true;
1763
+ }
1764
+
1765
+ async batchKeyboardInput(operations: Array<{type: string, keys?: string[], text?: string, delay?: number}>, skipStabilityWait: boolean = false): Promise<any> {
1766
+ const startTime = Date.now();
1767
+ const page = await this.getCurrentPage();
1768
+
1769
+ try {
1770
+ const maxOperations = 100; // Prevent excessive number of operations per batch
1771
+ if (!Array.isArray(operations) || operations.length > maxOperations) {
1772
+ throw new Error(`Too many operations in batch (max ${maxOperations} allowed)`);
1773
+ }
1774
+
1775
+ const executionStart = Date.now();
1776
+
1777
+ for (const op of operations) {
1778
+ switch (op.type) {
1779
+ case 'press':
1780
+ if (op.keys) {
1781
+ const keys = op.keys.join('+');
1782
+ await page.keyboard.press(keys);
1783
+ }
1784
+ break;
1785
+ case 'type':
1786
+ if (op.text) {
1787
+ // Limit delay to prevent resource exhaustion attacks
1788
+ const maxTypeDelay = 1000; // 1 second per character max
1789
+ let delayValue = Number(op.delay);
1790
+ if (!isFinite(delayValue) || delayValue < 0) delayValue = 0;
1791
+ const safeTypeDelay = Math.min(delayValue, maxTypeDelay);
1792
+ await page.keyboard.type(op.text, { delay: safeTypeDelay });
1793
+ }
1794
+ break;
1795
+ case 'wait':
1796
+ // Only apply wait if op.delay is a non-negative finite number
1797
+ // Limit to prevent resource exhaustion (CodeQL js/resource-exhaustion)
1798
+ {
1799
+ const MAX_WAIT_DELAY = 10000; // 10 seconds maximum
1800
+ let delayValue = Number(op.delay);
1801
+ if (!isFinite(delayValue) || delayValue < 0) {
1802
+ delayValue = 0;
1803
+ }
1804
+ // Clamp delay to safe range [0, MAX_WAIT_DELAY]
1805
+ const safeDelay = delayValue > MAX_WAIT_DELAY ? MAX_WAIT_DELAY : delayValue;
1806
+ // lgtm[js/resource-exhaustion]
1807
+ // Safe: delay is clamped to MAX_WAIT_DELAY (10 seconds)
1808
+ await new Promise(resolve => setTimeout(resolve, safeDelay));
1809
+ }
1810
+ break;
1811
+ }
1812
+ }
1813
+
1814
+ const executionTime = Date.now() - executionStart;
1815
+ let stabilityTime = 0;
1816
+ let stabilityResult = { domContentLoadedTime: 0, networkIdleTime: 0 };
1817
+
1818
+ if (!skipStabilityWait) {
1819
+ const stabilityStart = Date.now();
1820
+
1821
+ try {
1822
+ const browserConfig = this.configLoader.getBrowserConfig();
1823
+ await page.waitForLoadState(browserConfig.domContentLoadedState as any, { timeout: browserConfig.pageStabilityTimeout });
1824
+ stabilityResult.domContentLoadedTime = Date.now() - stabilityStart;
1825
+ } catch (error) {
1826
+ }
1827
+
1828
+ await new Promise(resolve => setTimeout(resolve, 50));
1829
+ stabilityTime = Date.now() - stabilityStart;
1830
+ } else {
1831
+ await new Promise(resolve => setTimeout(resolve, 50));
1832
+ stabilityTime = 50;
1833
+ }
1834
+
1835
+ const totalTime = Date.now() - startTime;
1836
+
1837
+ return {
1838
+ success: true,
1839
+ message: `Batch keyboard input completed (${operations.length} operations)`,
1840
+ timing: {
1841
+ total_time_ms: totalTime,
1842
+ execution_time_ms: executionTime,
1843
+ stability_wait_time_ms: stabilityTime,
1844
+ operations_count: operations.length,
1845
+ skipped_stability: skipStabilityWait,
1846
+ },
1847
+ };
1848
+ } catch (error) {
1849
+ const totalTime = Date.now() - startTime;
1850
+ return {
1851
+ success: false,
1852
+ message: `Batch keyboard input failed: ${error}`,
1853
+ timing: {
1854
+ total_time_ms: totalTime,
1855
+ },
1856
+ };
1857
+ }
1858
+ }
1859
+
1860
+ async getTabInfo(): Promise<TabInfo[]> {
1861
+ const tabInfo: TabInfo[] = [];
1862
+
1863
+ for (const [tabId, page] of this.pages) {
1864
+ if (!page.isClosed()) {
1865
+ try {
1866
+ const title = await page.title();
1867
+ const url = page.url();
1868
+
1869
+ tabInfo.push({
1870
+ tab_id: tabId,
1871
+ title,
1872
+ url,
1873
+ is_current: tabId === this.currentTabId,
1874
+ });
1875
+ } catch (error) {
1876
+ // Skip tabs that can't be accessed
1877
+ }
1878
+ }
1879
+ }
1880
+
1881
+ return tabInfo;
1882
+ }
1883
+
1884
+ async takeScreenshot(): Promise<{ buffer: Buffer; timing: { screenshot_time_ms: number } }> {
1885
+ const startTime = Date.now();
1886
+ const page = await this.getCurrentPage();
1887
+
1888
+ const browserConfig = this.configLoader.getBrowserConfig();
1889
+ const buffer = await page.screenshot({
1890
+ timeout: browserConfig.screenshotTimeout,
1891
+ fullPage: browserConfig.fullPageScreenshot
1892
+ });
1893
+
1894
+ const screenshotTime = Date.now() - startTime;
1895
+
1896
+ return {
1897
+ buffer,
1898
+ timing: {
1899
+ screenshot_time_ms: screenshotTime,
1900
+ },
1901
+ };
1902
+ }
1903
+
1904
+ async close(): Promise<void> {
1905
+ const browserConfig = this.configLoader.getBrowserConfig();
1906
+
1907
+ for (const page of this.pages.values()) {
1908
+ if (!page.isClosed()) {
1909
+ await page.close();
1910
+ }
1911
+ }
1912
+
1913
+ this.pages.clear();
1914
+ this.currentTabId = null;
1915
+
1916
+ // Handle context cleanup separately for CDP mode
1917
+ if (!browserConfig.cdpUrl && this.context && this.contextOwnedByUs) {
1918
+ // For non-CDP mode, close context here
1919
+ await this.context.close();
1920
+ this.context = null;
1921
+ this.contextOwnedByUs = false;
1922
+ }
1923
+
1924
+ if (this.browser) {
1925
+ if (browserConfig.cdpUrl) {
1926
+ // In CDP mode: tear down only our context, then disconnect
1927
+ if (this.context && this.contextOwnedByUs) {
1928
+ await this.context.close().catch(() => {});
1929
+ this.context = null;
1930
+ this.contextOwnedByUs = false;
1931
+ }
1932
+ await this.browser.close(); // disconnect
1933
+ } else {
1934
+ // Local launch: close everything
1935
+ await this.browser.close();
1936
+ }
1937
+ this.browser = null;
1938
+ }
1939
+ }
1940
+ }