camel-ai 0.2.59__py3-none-any.whl → 0.2.82__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

Files changed (506) hide show
  1. camel/__init__.py +3 -3
  2. camel/agents/__init__.py +2 -2
  3. camel/agents/_types.py +9 -4
  4. camel/agents/_utils.py +40 -2
  5. camel/agents/base.py +2 -2
  6. camel/agents/chat_agent.py +5012 -902
  7. camel/agents/critic_agent.py +2 -2
  8. camel/agents/deductive_reasoner_agent.py +56 -56
  9. camel/agents/embodied_agent.py +2 -2
  10. camel/agents/knowledge_graph_agent.py +20 -20
  11. camel/agents/mcp_agent.py +39 -36
  12. camel/agents/multi_hop_generator_agent.py +3 -3
  13. camel/agents/programmed_agent_instruction.py +2 -2
  14. camel/agents/repo_agent.py +4 -3
  15. camel/agents/role_assignment_agent.py +2 -2
  16. camel/agents/search_agent.py +2 -2
  17. camel/agents/task_agent.py +2 -2
  18. camel/agents/tool_agents/__init__.py +2 -2
  19. camel/agents/tool_agents/base.py +2 -2
  20. camel/agents/tool_agents/hugging_face_tool_agent.py +3 -3
  21. camel/benchmarks/__init__.py +2 -2
  22. camel/benchmarks/apibank.py +5 -5
  23. camel/benchmarks/apibench.py +2 -2
  24. camel/benchmarks/base.py +2 -2
  25. camel/benchmarks/browsecomp.py +44 -33
  26. camel/benchmarks/gaia.py +17 -13
  27. camel/benchmarks/mock_website/README.md +94 -0
  28. camel/benchmarks/mock_website/mock_web.py +299 -0
  29. camel/benchmarks/mock_website/requirements.txt +3 -0
  30. camel/benchmarks/mock_website/shopping_mall/app.py +465 -0
  31. camel/benchmarks/mock_website/task.json +104 -0
  32. camel/benchmarks/nexus.py +3 -3
  33. camel/benchmarks/ragbench.py +2 -2
  34. camel/bots/__init__.py +2 -2
  35. camel/bots/discord/__init__.py +2 -2
  36. camel/bots/discord/discord_app.py +2 -2
  37. camel/bots/discord/discord_installation.py +2 -2
  38. camel/bots/discord/discord_store.py +3 -3
  39. camel/bots/slack/__init__.py +2 -2
  40. camel/bots/slack/models.py +4 -4
  41. camel/bots/slack/slack_app.py +2 -2
  42. camel/bots/telegram_bot.py +2 -2
  43. camel/configs/__init__.py +26 -2
  44. camel/configs/aihubmix_config.py +90 -0
  45. camel/configs/aiml_config.py +2 -2
  46. camel/configs/amd_config.py +70 -0
  47. camel/configs/anthropic_config.py +8 -7
  48. camel/configs/base_config.py +2 -2
  49. camel/configs/bedrock_config.py +5 -3
  50. camel/configs/cerebras_config.py +98 -0
  51. camel/configs/cohere_config.py +3 -3
  52. camel/configs/cometapi_config.py +106 -0
  53. camel/configs/crynux_config.py +94 -0
  54. camel/configs/deepseek_config.py +9 -8
  55. camel/configs/gemini_config.py +6 -4
  56. camel/configs/groq_config.py +6 -4
  57. camel/configs/internlm_config.py +6 -4
  58. camel/configs/litellm_config.py +2 -2
  59. camel/configs/lmstudio_config.py +6 -4
  60. camel/configs/minimax_config.py +95 -0
  61. camel/configs/mistral_config.py +3 -3
  62. camel/configs/modelscope_config.py +5 -3
  63. camel/configs/moonshot_config.py +2 -2
  64. camel/configs/nebius_config.py +105 -0
  65. camel/configs/netmind_config.py +2 -2
  66. camel/configs/novita_config.py +2 -2
  67. camel/configs/nvidia_config.py +2 -2
  68. camel/configs/ollama_config.py +2 -2
  69. camel/configs/openai_config.py +8 -3
  70. camel/configs/openrouter_config.py +6 -4
  71. camel/configs/ppio_config.py +2 -2
  72. camel/configs/qianfan_config.py +85 -0
  73. camel/configs/qwen_config.py +2 -2
  74. camel/configs/reka_config.py +3 -3
  75. camel/configs/samba_config.py +8 -6
  76. camel/configs/sglang_config.py +2 -2
  77. camel/configs/siliconflow_config.py +2 -2
  78. camel/configs/togetherai_config.py +2 -2
  79. camel/configs/vllm_config.py +4 -2
  80. camel/configs/watsonx_config.py +2 -2
  81. camel/configs/yi_config.py +6 -4
  82. camel/configs/zhipuai_config.py +6 -4
  83. camel/{data_collector → data_collectors}/__init__.py +2 -2
  84. camel/{data_collector → data_collectors}/alpaca_collector.py +19 -10
  85. camel/{data_collector → data_collectors}/base.py +2 -2
  86. camel/{data_collector → data_collectors}/sharegpt_collector.py +3 -3
  87. camel/datagen/__init__.py +2 -2
  88. camel/datagen/cot_datagen.py +32 -37
  89. camel/datagen/evol_instruct/__init__.py +2 -2
  90. camel/datagen/evol_instruct/evol_instruct.py +2 -2
  91. camel/datagen/evol_instruct/scorer.py +24 -25
  92. camel/datagen/evol_instruct/templates.py +48 -48
  93. camel/datagen/self_improving_cot.py +5 -5
  94. camel/datagen/self_instruct/__init__.py +2 -2
  95. camel/datagen/self_instruct/filter/__init__.py +2 -2
  96. camel/datagen/self_instruct/filter/filter_function.py +2 -2
  97. camel/datagen/self_instruct/filter/filter_registry.py +2 -2
  98. camel/datagen/self_instruct/filter/instruction_filter.py +2 -2
  99. camel/datagen/self_instruct/self_instruct.py +2 -2
  100. camel/datagen/self_instruct/templates.py +47 -47
  101. camel/datagen/source2synth/__init__.py +2 -2
  102. camel/datagen/source2synth/data_processor.py +2 -2
  103. camel/datagen/source2synth/models.py +2 -2
  104. camel/datagen/source2synth/user_data_processor_config.py +2 -2
  105. camel/datahubs/__init__.py +2 -2
  106. camel/datahubs/base.py +2 -2
  107. camel/datahubs/huggingface.py +2 -2
  108. camel/datahubs/models.py +2 -2
  109. camel/datasets/__init__.py +2 -2
  110. camel/datasets/base_generator.py +41 -12
  111. camel/datasets/few_shot_generator.py +18 -18
  112. camel/datasets/models.py +3 -3
  113. camel/datasets/self_instruct_generator.py +2 -2
  114. camel/datasets/static_dataset.py +152 -2
  115. camel/embeddings/__init__.py +2 -2
  116. camel/embeddings/azure_embedding.py +2 -2
  117. camel/embeddings/base.py +2 -2
  118. camel/embeddings/gemini_embedding.py +2 -2
  119. camel/embeddings/jina_embedding.py +10 -3
  120. camel/embeddings/mistral_embedding.py +2 -2
  121. camel/embeddings/openai_compatible_embedding.py +2 -2
  122. camel/embeddings/openai_embedding.py +2 -2
  123. camel/embeddings/sentence_transformers_embeddings.py +4 -4
  124. camel/embeddings/together_embedding.py +2 -2
  125. camel/embeddings/vlm_embedding.py +11 -4
  126. camel/environments/__init__.py +14 -2
  127. camel/environments/models.py +2 -2
  128. camel/environments/multi_step.py +2 -2
  129. camel/environments/rlcards_env.py +860 -0
  130. camel/environments/single_step.py +30 -5
  131. camel/environments/tic_tac_toe.py +3 -3
  132. camel/extractors/__init__.py +2 -2
  133. camel/extractors/base.py +2 -2
  134. camel/extractors/python_strategies.py +2 -2
  135. camel/generators.py +2 -2
  136. camel/human.py +2 -2
  137. camel/interpreters/__init__.py +4 -2
  138. camel/interpreters/base.py +16 -3
  139. camel/interpreters/docker/Dockerfile +53 -7
  140. camel/interpreters/docker_interpreter.py +70 -11
  141. camel/interpreters/e2b_interpreter.py +59 -11
  142. camel/interpreters/internal_python_interpreter.py +81 -4
  143. camel/interpreters/interpreter_error.py +2 -2
  144. camel/interpreters/ipython_interpreter.py +23 -5
  145. camel/interpreters/microsandbox_interpreter.py +395 -0
  146. camel/interpreters/subprocess_interpreter.py +36 -4
  147. camel/loaders/__init__.py +17 -5
  148. camel/loaders/apify_reader.py +2 -2
  149. camel/loaders/base_io.py +2 -2
  150. camel/loaders/base_loader.py +85 -0
  151. camel/loaders/chunkr_reader.py +128 -93
  152. camel/loaders/crawl4ai_reader.py +2 -2
  153. camel/loaders/firecrawl_reader.py +6 -6
  154. camel/loaders/jina_url_reader.py +2 -2
  155. camel/loaders/markitdown.py +2 -2
  156. camel/loaders/mineru_extractor.py +2 -2
  157. camel/loaders/mistral_reader.py +148 -0
  158. camel/loaders/scrapegraph_reader.py +2 -2
  159. camel/loaders/unstructured_io.py +2 -2
  160. camel/logger.py +5 -5
  161. camel/memories/__init__.py +2 -2
  162. camel/memories/agent_memories.py +86 -3
  163. camel/memories/base.py +36 -2
  164. camel/memories/blocks/__init__.py +2 -2
  165. camel/memories/blocks/chat_history_block.py +126 -9
  166. camel/memories/blocks/vectordb_block.py +10 -3
  167. camel/memories/context_creators/__init__.py +2 -2
  168. camel/memories/context_creators/score_based.py +31 -239
  169. camel/memories/records.py +98 -13
  170. camel/messages/__init__.py +2 -2
  171. camel/messages/base.py +193 -46
  172. camel/messages/conversion/__init__.py +2 -2
  173. camel/messages/conversion/alpaca.py +2 -2
  174. camel/messages/conversion/conversation_models.py +2 -2
  175. camel/messages/conversion/sharegpt/__init__.py +2 -2
  176. camel/messages/conversion/sharegpt/function_call_formatter.py +2 -2
  177. camel/messages/conversion/sharegpt/hermes/__init__.py +2 -2
  178. camel/messages/conversion/sharegpt/hermes/hermes_function_formatter.py +2 -2
  179. camel/messages/func_message.py +54 -17
  180. camel/models/__init__.py +18 -2
  181. camel/models/_utils.py +3 -3
  182. camel/models/aihubmix_model.py +83 -0
  183. camel/models/aiml_model.py +11 -18
  184. camel/models/amd_model.py +101 -0
  185. camel/models/anthropic_model.py +127 -20
  186. camel/models/aws_bedrock_model.py +12 -35
  187. camel/models/azure_openai_model.py +263 -63
  188. camel/models/base_audio_model.py +5 -3
  189. camel/models/base_model.py +195 -26
  190. camel/models/cerebras_model.py +83 -0
  191. camel/models/cohere_model.py +81 -21
  192. camel/models/cometapi_model.py +83 -0
  193. camel/models/crynux_model.py +87 -0
  194. camel/models/deepseek_model.py +61 -59
  195. camel/models/fish_audio_model.py +8 -2
  196. camel/models/gemini_model.py +439 -30
  197. camel/models/groq_model.py +11 -19
  198. camel/models/internlm_model.py +11 -18
  199. camel/models/litellm_model.py +94 -34
  200. camel/models/lmstudio_model.py +17 -20
  201. camel/models/minimax_model.py +83 -0
  202. camel/models/mistral_model.py +84 -19
  203. camel/models/model_factory.py +49 -6
  204. camel/models/model_manager.py +33 -11
  205. camel/models/modelscope_model.py +13 -193
  206. camel/models/moonshot_model.py +195 -21
  207. camel/models/nebius_model.py +83 -0
  208. camel/models/nemotron_model.py +19 -9
  209. camel/models/netmind_model.py +11 -18
  210. camel/models/novita_model.py +11 -18
  211. camel/models/nvidia_model.py +11 -18
  212. camel/models/ollama_model.py +14 -21
  213. camel/models/openai_audio_models.py +2 -2
  214. camel/models/openai_compatible_model.py +234 -27
  215. camel/models/openai_model.py +255 -39
  216. camel/models/openrouter_model.py +11 -19
  217. camel/models/ppio_model.py +11 -18
  218. camel/models/qianfan_model.py +89 -0
  219. camel/models/qwen_model.py +13 -193
  220. camel/models/reka_model.py +90 -21
  221. camel/models/reward/__init__.py +2 -2
  222. camel/models/reward/base_reward_model.py +2 -2
  223. camel/models/reward/evaluator.py +2 -2
  224. camel/models/reward/nemotron_model.py +2 -2
  225. camel/models/reward/skywork_model.py +2 -2
  226. camel/models/samba_model.py +117 -49
  227. camel/models/sglang_model.py +162 -42
  228. camel/models/siliconflow_model.py +12 -35
  229. camel/models/stub_model.py +10 -7
  230. camel/models/togetherai_model.py +11 -18
  231. camel/models/vllm_model.py +10 -18
  232. camel/models/volcano_model.py +16 -20
  233. camel/models/watsonx_model.py +69 -19
  234. camel/models/yi_model.py +11 -18
  235. camel/models/zhipuai_model.py +70 -18
  236. camel/parsers/__init__.py +18 -0
  237. camel/parsers/mcp_tool_call_parser.py +176 -0
  238. camel/personas/__init__.py +2 -2
  239. camel/personas/persona.py +2 -2
  240. camel/personas/persona_hub.py +2 -2
  241. camel/prompts/__init__.py +2 -2
  242. camel/prompts/ai_society.py +2 -2
  243. camel/prompts/base.py +2 -2
  244. camel/prompts/code.py +2 -2
  245. camel/prompts/evaluation.py +2 -2
  246. camel/prompts/generate_text_embedding_data.py +2 -2
  247. camel/prompts/image_craft.py +2 -2
  248. camel/prompts/misalignment.py +2 -2
  249. camel/prompts/multi_condition_image_craft.py +2 -2
  250. camel/prompts/object_recognition.py +2 -2
  251. camel/prompts/persona_hub.py +3 -3
  252. camel/prompts/prompt_templates.py +2 -2
  253. camel/prompts/role_description_prompt_template.py +2 -2
  254. camel/prompts/solution_extraction.py +8 -8
  255. camel/prompts/task_prompt_template.py +2 -2
  256. camel/prompts/translation.py +2 -2
  257. camel/prompts/video_description_prompt.py +3 -3
  258. camel/responses/__init__.py +2 -2
  259. camel/responses/agent_responses.py +2 -2
  260. camel/retrievers/__init__.py +2 -2
  261. camel/retrievers/auto_retriever.py +23 -3
  262. camel/retrievers/base.py +2 -2
  263. camel/retrievers/bm25_retriever.py +3 -4
  264. camel/retrievers/cohere_rerank_retriever.py +2 -2
  265. camel/retrievers/hybrid_retrival.py +4 -4
  266. camel/retrievers/vector_retriever.py +2 -2
  267. camel/runtimes/Dockerfile.multi-toolkit +90 -0
  268. camel/{runtime → runtimes}/__init__.py +2 -2
  269. camel/runtimes/api.py +153 -0
  270. camel/{runtime → runtimes}/base.py +2 -2
  271. camel/{runtime → runtimes}/configs.py +13 -13
  272. camel/{runtime → runtimes}/daytona_runtime.py +18 -19
  273. camel/{runtime → runtimes}/docker_runtime.py +13 -13
  274. camel/{runtime → runtimes}/llm_guard_runtime.py +28 -28
  275. camel/{runtime → runtimes}/remote_http_runtime.py +12 -12
  276. camel/{runtime → runtimes}/ubuntu_docker_runtime.py +3 -3
  277. camel/{runtime → runtimes}/utils/__init__.py +2 -2
  278. camel/{runtime → runtimes}/utils/function_risk_toolkit.py +2 -2
  279. camel/{runtime → runtimes}/utils/ignore_risk_toolkit.py +2 -2
  280. camel/schemas/__init__.py +2 -2
  281. camel/schemas/base.py +2 -2
  282. camel/schemas/openai_converter.py +3 -3
  283. camel/schemas/outlines_converter.py +2 -2
  284. camel/services/agent_openapi_server.py +380 -0
  285. camel/societies/__init__.py +4 -2
  286. camel/societies/babyagi_playing.py +2 -2
  287. camel/societies/role_playing.py +201 -80
  288. camel/societies/workforce/__init__.py +10 -3
  289. camel/societies/workforce/base.py +9 -5
  290. camel/societies/workforce/events.py +143 -0
  291. camel/societies/workforce/prompts.py +258 -33
  292. camel/societies/workforce/role_playing_worker.py +95 -30
  293. camel/societies/workforce/single_agent_worker.py +659 -30
  294. camel/societies/workforce/structured_output_handler.py +512 -0
  295. camel/societies/workforce/task_channel.py +182 -38
  296. camel/societies/workforce/utils.py +784 -18
  297. camel/societies/workforce/worker.py +96 -28
  298. camel/societies/workforce/workflow_memory_manager.py +1746 -0
  299. camel/societies/workforce/workforce.py +5730 -366
  300. camel/societies/workforce/workforce_callback.py +103 -0
  301. camel/societies/workforce/workforce_logger.py +647 -0
  302. camel/societies/workforce/workforce_metrics.py +33 -0
  303. camel/storages/__init__.py +10 -2
  304. camel/storages/graph_storages/__init__.py +2 -2
  305. camel/storages/graph_storages/base.py +2 -2
  306. camel/storages/graph_storages/graph_element.py +2 -2
  307. camel/storages/graph_storages/nebula_graph.py +4 -4
  308. camel/storages/graph_storages/neo4j_graph.py +7 -7
  309. camel/storages/key_value_storages/__init__.py +2 -2
  310. camel/storages/key_value_storages/base.py +2 -2
  311. camel/storages/key_value_storages/in_memory.py +2 -2
  312. camel/storages/key_value_storages/json.py +17 -4
  313. camel/storages/key_value_storages/mem0_cloud.py +50 -49
  314. camel/storages/key_value_storages/redis.py +2 -2
  315. camel/storages/object_storages/__init__.py +2 -2
  316. camel/storages/object_storages/amazon_s3.py +2 -2
  317. camel/storages/object_storages/azure_blob.py +2 -2
  318. camel/storages/object_storages/base.py +2 -2
  319. camel/storages/object_storages/google_cloud.py +3 -3
  320. camel/storages/vectordb_storages/__init__.py +12 -2
  321. camel/storages/vectordb_storages/base.py +2 -2
  322. camel/storages/vectordb_storages/chroma.py +731 -0
  323. camel/storages/vectordb_storages/faiss.py +712 -0
  324. camel/storages/vectordb_storages/milvus.py +2 -2
  325. camel/storages/vectordb_storages/oceanbase.py +16 -17
  326. camel/storages/vectordb_storages/pgvector.py +349 -0
  327. camel/storages/vectordb_storages/qdrant.py +6 -6
  328. camel/storages/vectordb_storages/surreal.py +372 -0
  329. camel/storages/vectordb_storages/tidb.py +11 -8
  330. camel/storages/vectordb_storages/weaviate.py +714 -0
  331. camel/tasks/__init__.py +2 -2
  332. camel/tasks/task.py +366 -27
  333. camel/tasks/task_prompt.py +3 -3
  334. camel/terminators/__init__.py +2 -2
  335. camel/terminators/base.py +2 -2
  336. camel/terminators/response_terminator.py +2 -2
  337. camel/terminators/token_limit_terminator.py +2 -2
  338. camel/toolkits/__init__.py +58 -10
  339. camel/toolkits/aci_toolkit.py +66 -21
  340. camel/toolkits/arxiv_toolkit.py +8 -8
  341. camel/toolkits/ask_news_toolkit.py +2 -2
  342. camel/toolkits/async_browser_toolkit.py +174 -575
  343. camel/toolkits/audio_analysis_toolkit.py +3 -3
  344. camel/toolkits/base.py +65 -7
  345. camel/toolkits/bohrium_toolkit.py +318 -0
  346. camel/toolkits/browser_toolkit.py +306 -566
  347. camel/toolkits/browser_toolkit_commons.py +568 -0
  348. camel/toolkits/code_execution.py +67 -11
  349. camel/toolkits/context_summarizer_toolkit.py +684 -0
  350. camel/toolkits/craw4ai_toolkit.py +93 -0
  351. camel/toolkits/dappier_toolkit.py +12 -8
  352. camel/toolkits/data_commons_toolkit.py +2 -2
  353. camel/toolkits/dingtalk.py +1135 -0
  354. camel/toolkits/earth_science_toolkit.py +5367 -0
  355. camel/toolkits/edgeone_pages_mcp_toolkit.py +49 -0
  356. camel/toolkits/excel_toolkit.py +910 -70
  357. camel/toolkits/file_toolkit.py +1402 -0
  358. camel/toolkits/function_tool.py +128 -20
  359. camel/toolkits/github_toolkit.py +148 -43
  360. camel/toolkits/gmail_toolkit.py +1839 -0
  361. camel/toolkits/google_calendar_toolkit.py +40 -6
  362. camel/toolkits/google_drive_mcp_toolkit.py +54 -0
  363. camel/toolkits/google_maps_toolkit.py +2 -2
  364. camel/toolkits/google_scholar_toolkit.py +2 -2
  365. camel/toolkits/human_toolkit.py +36 -12
  366. camel/toolkits/hybrid_browser_toolkit/__init__.py +18 -0
  367. camel/toolkits/hybrid_browser_toolkit/config_loader.py +185 -0
  368. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +246 -0
  369. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +1973 -0
  370. camel/toolkits/hybrid_browser_toolkit/installer.py +203 -0
  371. camel/toolkits/hybrid_browser_toolkit/ts/package-lock.json +4589 -0
  372. camel/toolkits/hybrid_browser_toolkit/ts/package.json +33 -0
  373. camel/toolkits/hybrid_browser_toolkit/ts/src/browser-scripts.js +125 -0
  374. camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +1929 -0
  375. camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +233 -0
  376. camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +589 -0
  377. camel/toolkits/hybrid_browser_toolkit/ts/src/index.ts +7 -0
  378. camel/toolkits/hybrid_browser_toolkit/ts/src/parent-child-filter.ts +226 -0
  379. camel/toolkits/hybrid_browser_toolkit/ts/src/snapshot-parser.ts +219 -0
  380. camel/toolkits/hybrid_browser_toolkit/ts/src/som-screenshot-injected.ts +543 -0
  381. camel/toolkits/hybrid_browser_toolkit/ts/src/types.ts +129 -0
  382. camel/toolkits/hybrid_browser_toolkit/ts/tsconfig.json +27 -0
  383. camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +319 -0
  384. camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +1037 -0
  385. camel/toolkits/hybrid_browser_toolkit_py/__init__.py +17 -0
  386. camel/toolkits/hybrid_browser_toolkit_py/actions.py +575 -0
  387. camel/toolkits/hybrid_browser_toolkit_py/agent.py +311 -0
  388. camel/toolkits/hybrid_browser_toolkit_py/browser_session.py +787 -0
  389. camel/toolkits/hybrid_browser_toolkit_py/config_loader.py +490 -0
  390. camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +2390 -0
  391. camel/toolkits/hybrid_browser_toolkit_py/snapshot.py +233 -0
  392. camel/toolkits/hybrid_browser_toolkit_py/stealth_script.js +0 -0
  393. camel/toolkits/hybrid_browser_toolkit_py/unified_analyzer.js +1043 -0
  394. camel/toolkits/image_analysis_toolkit.py +3 -3
  395. camel/toolkits/image_generation_toolkit.py +390 -0
  396. camel/toolkits/jina_reranker_toolkit.py +195 -79
  397. camel/toolkits/klavis_toolkit.py +7 -3
  398. camel/toolkits/linkedin_toolkit.py +2 -2
  399. camel/toolkits/markitdown_toolkit.py +104 -0
  400. camel/toolkits/math_toolkit.py +66 -12
  401. camel/toolkits/mcp_toolkit.py +841 -600
  402. camel/toolkits/memory_toolkit.py +7 -3
  403. camel/toolkits/meshy_toolkit.py +2 -2
  404. camel/toolkits/message_agent_toolkit.py +608 -0
  405. camel/toolkits/message_integration.py +724 -0
  406. camel/toolkits/mineru_toolkit.py +2 -2
  407. camel/toolkits/minimax_mcp_toolkit.py +195 -0
  408. camel/toolkits/networkx_toolkit.py +2 -2
  409. camel/toolkits/note_taking_toolkit.py +277 -0
  410. camel/toolkits/notion_mcp_toolkit.py +224 -0
  411. camel/toolkits/notion_toolkit.py +2 -2
  412. camel/toolkits/open_api_specs/biztoc/__init__.py +2 -2
  413. camel/toolkits/open_api_specs/biztoc/ai-plugin.json +1 -1
  414. camel/toolkits/open_api_specs/coursera/__init__.py +2 -2
  415. camel/toolkits/open_api_specs/create_qr_code/__init__.py +2 -2
  416. camel/toolkits/open_api_specs/klarna/__init__.py +2 -2
  417. camel/toolkits/open_api_specs/nasa_apod/__init__.py +2 -2
  418. camel/toolkits/open_api_specs/outschool/__init__.py +2 -2
  419. camel/toolkits/open_api_specs/outschool/ai-plugin.json +1 -1
  420. camel/toolkits/open_api_specs/outschool/openapi.yaml +1 -1
  421. camel/toolkits/open_api_specs/outschool/paths/__init__.py +2 -2
  422. camel/toolkits/open_api_specs/outschool/paths/get_classes.py +2 -2
  423. camel/toolkits/open_api_specs/outschool/paths/search_teachers.py +2 -2
  424. camel/toolkits/open_api_specs/security_config.py +2 -2
  425. camel/toolkits/open_api_specs/speak/__init__.py +2 -2
  426. camel/toolkits/open_api_specs/web_scraper/__init__.py +2 -2
  427. camel/toolkits/open_api_specs/web_scraper/ai-plugin.json +1 -1
  428. camel/toolkits/open_api_specs/web_scraper/paths/__init__.py +2 -2
  429. camel/toolkits/open_api_specs/web_scraper/paths/scraper.py +2 -2
  430. camel/toolkits/open_api_toolkit.py +2 -2
  431. camel/toolkits/openbb_toolkit.py +7 -3
  432. camel/toolkits/origene_mcp_toolkit.py +56 -0
  433. camel/toolkits/page_script.js +86 -74
  434. camel/toolkits/playwright_mcp_toolkit.py +27 -32
  435. camel/toolkits/pptx_toolkit.py +790 -0
  436. camel/toolkits/pubmed_toolkit.py +2 -2
  437. camel/toolkits/pulse_mcp_search_toolkit.py +2 -2
  438. camel/toolkits/pyautogui_toolkit.py +2 -2
  439. camel/toolkits/reddit_toolkit.py +2 -2
  440. camel/toolkits/resend_toolkit.py +168 -0
  441. camel/toolkits/retrieval_toolkit.py +2 -2
  442. camel/toolkits/screenshot_toolkit.py +213 -0
  443. camel/toolkits/search_toolkit.py +539 -146
  444. camel/toolkits/searxng_toolkit.py +2 -2
  445. camel/toolkits/semantic_scholar_toolkit.py +2 -2
  446. camel/toolkits/slack_toolkit.py +108 -58
  447. camel/toolkits/sql_toolkit.py +712 -0
  448. camel/toolkits/stripe_toolkit.py +2 -2
  449. camel/toolkits/sympy_toolkit.py +3 -3
  450. camel/toolkits/task_planning_toolkit.py +134 -0
  451. camel/toolkits/terminal_toolkit/__init__.py +18 -0
  452. camel/toolkits/terminal_toolkit/terminal_toolkit.py +1070 -0
  453. camel/toolkits/terminal_toolkit/utils.py +532 -0
  454. camel/toolkits/thinking_toolkit.py +3 -3
  455. camel/toolkits/twitter_toolkit.py +8 -3
  456. camel/toolkits/vertex_ai_veo_toolkit.py +590 -0
  457. camel/toolkits/video_analysis_toolkit.py +112 -29
  458. camel/toolkits/video_download_toolkit.py +22 -16
  459. camel/toolkits/weather_toolkit.py +2 -2
  460. camel/toolkits/web_deploy_toolkit.py +1219 -0
  461. camel/toolkits/wechat_official_toolkit.py +483 -0
  462. camel/toolkits/whatsapp_toolkit.py +2 -2
  463. camel/toolkits/wolfram_alpha_toolkit.py +53 -25
  464. camel/toolkits/zapier_toolkit.py +7 -3
  465. camel/types/__init__.py +4 -4
  466. camel/types/agents/__init__.py +2 -2
  467. camel/types/agents/tool_calling_record.py +6 -3
  468. camel/types/enums.py +454 -35
  469. camel/types/mcp_registries.py +2 -2
  470. camel/types/openai_types.py +4 -4
  471. camel/types/unified_model_type.py +43 -6
  472. camel/utils/__init__.py +20 -2
  473. camel/utils/async_func.py +2 -2
  474. camel/utils/chunker/__init__.py +2 -2
  475. camel/utils/chunker/base.py +2 -2
  476. camel/utils/chunker/code_chunker.py +2 -2
  477. camel/utils/chunker/uio_chunker.py +2 -2
  478. camel/utils/commons.py +65 -7
  479. camel/utils/constants.py +5 -2
  480. camel/utils/context_utils.py +1134 -0
  481. camel/utils/deduplication.py +2 -2
  482. camel/utils/filename.py +2 -2
  483. camel/utils/langfuse.py +258 -0
  484. camel/utils/mcp.py +140 -6
  485. camel/utils/mcp_client.py +1056 -0
  486. camel/utils/message_summarizer.py +148 -0
  487. camel/utils/response_format.py +2 -2
  488. camel/utils/token_counting.py +45 -22
  489. camel/utils/tool_result.py +44 -0
  490. camel/verifiers/__init__.py +2 -2
  491. camel/verifiers/base.py +2 -2
  492. camel/verifiers/math_verifier.py +2 -2
  493. camel/verifiers/models.py +2 -2
  494. camel/verifiers/physics_verifier.py +2 -2
  495. camel/verifiers/python_verifier.py +2 -2
  496. {camel_ai-0.2.59.dist-info → camel_ai-0.2.82.dist-info}/METADATA +349 -108
  497. camel_ai-0.2.82.dist-info/RECORD +507 -0
  498. {camel_ai-0.2.59.dist-info → camel_ai-0.2.82.dist-info}/WHEEL +1 -1
  499. {camel_ai-0.2.59.dist-info → camel_ai-0.2.82.dist-info}/licenses/LICENSE +1 -1
  500. camel/loaders/pandas_reader.py +0 -368
  501. camel/runtime/api.py +0 -97
  502. camel/toolkits/dalle_toolkit.py +0 -171
  503. camel/toolkits/file_write_toolkit.py +0 -395
  504. camel/toolkits/openai_agent_toolkit.py +0 -135
  505. camel/toolkits/terminal_toolkit.py +0 -1037
  506. camel_ai-0.2.59.dist-info/RECORD +0 -410
@@ -0,0 +1,1929 @@
1
+ import { Page, Browser, BrowserContext, chromium, ConsoleMessage, Frame } from 'playwright';
2
+ import { BrowserToolkitConfig, SnapshotResult, SnapshotElement, ActionResult, TabInfo, BrowserAction, DetailedTiming } from './types';
3
+ import { ConfigLoader, StealthConfig } from './config-loader';
4
+
5
+ export class HybridBrowserSession {
6
+ private browser: Browser | null = null;
7
+ private context: BrowserContext | null = null;
8
+ private contextOwnedByUs: boolean = false;
9
+ private pages: Map<string, Page> = new Map();
10
+ private consoleLogs: Map<string, ConsoleMessage[]> = new Map();
11
+ private currentTabId: string | null = null;
12
+ private tabCounter = 0;
13
+ private configLoader: ConfigLoader;
14
+ private scrollPosition: { x: number; y: number } = {x: 0, y: 0};
15
+ private hasNavigatedBefore = false; // Track if we've navigated before
16
+ private logLimit: number;
17
+
18
+ constructor(config: BrowserToolkitConfig = {}) {
19
+ // Use ConfigLoader's fromPythonConfig to handle conversion properly
20
+ this.configLoader = ConfigLoader.fromPythonConfig(config);
21
+ // Load browser configuration for console log limit, default to 1000
22
+ this.logLimit = this.configLoader.getBrowserConfig().consoleLogLimit || 1000;
23
+ }
24
+
25
+ private registerNewPage(tabId: string, page: Page): void {
26
+ // Register page and logs with tabId
27
+ this.pages.set(tabId, page);
28
+ this.consoleLogs.set(tabId, []);
29
+ // Set up console log listener for the page
30
+ page.on('console', (msg: ConsoleMessage) => {
31
+ const logs = this.consoleLogs.get(tabId);
32
+ if (logs) {
33
+ logs.push(msg);
34
+ if (logs.length > this.logLimit) {
35
+ logs.shift();
36
+ }
37
+ }
38
+ });
39
+
40
+ // Clean logs on page close
41
+ page.on('close', () => {
42
+ this.consoleLogs.delete(tabId);
43
+ });
44
+ }
45
+
46
+ async ensureBrowser(): Promise<void> {
47
+ if (this.browser) {
48
+ return;
49
+ }
50
+
51
+ const browserConfig = this.configLoader.getBrowserConfig();
52
+ const stealthConfig = this.configLoader.getStealthConfig();
53
+
54
+ // Check if CDP URL is provided
55
+ if (browserConfig.cdpUrl) {
56
+ // Connect to existing browser via CDP
57
+ this.browser = await chromium.connectOverCDP(browserConfig.cdpUrl);
58
+
59
+ // Get existing contexts or create new one
60
+ const contexts = this.browser.contexts();
61
+ if (contexts.length > 0) {
62
+ this.context = contexts[0];
63
+ this.contextOwnedByUs = false;
64
+
65
+ // Apply stealth headers to existing context if configured
66
+ // Note: userAgent cannot be changed on an existing context
67
+ if (stealthConfig.enabled) {
68
+ if (stealthConfig.extraHTTPHeaders) {
69
+ await this.context.setExtraHTTPHeaders(stealthConfig.extraHTTPHeaders);
70
+ }
71
+ if (stealthConfig.userAgent) {
72
+ console.warn('[HybridBrowserSession] Cannot apply userAgent to existing context. Consider creating a new context if userAgent customization is required.');
73
+ }
74
+ }
75
+ } else {
76
+ const contextOptions: any = {
77
+ viewport: browserConfig.viewport
78
+ };
79
+
80
+ // Apply stealth headers and UA if configured
81
+ if (stealthConfig.enabled) {
82
+ if (stealthConfig.extraHTTPHeaders) {
83
+ contextOptions.extraHTTPHeaders = stealthConfig.extraHTTPHeaders;
84
+ }
85
+ if (stealthConfig.userAgent) {
86
+ contextOptions.userAgent = stealthConfig.userAgent;
87
+ }
88
+ }
89
+
90
+ this.context = await this.browser.newContext(contextOptions);
91
+ this.contextOwnedByUs = true;
92
+ this.browser = this.context.browser();
93
+ }
94
+
95
+ const pages = this.context.pages();
96
+ console.log(`[CDP] cdpKeepCurrentPage: ${browserConfig.cdpKeepCurrentPage}, pages count: ${pages.length}`);
97
+ if (browserConfig.cdpKeepCurrentPage) {
98
+ // Use existing page without creating new ones
99
+ if (pages.length > 0) {
100
+ // Find first non-closed page
101
+ let validPage: Page | null = null;
102
+ for (const page of pages) {
103
+ if (!page.isClosed()) {
104
+ validPage = page;
105
+ break;
106
+ }
107
+ }
108
+
109
+ if (validPage) {
110
+ const tabId = this.generateTabId();
111
+ this.registerNewPage(tabId, validPage);
112
+ this.currentTabId = tabId;
113
+ console.log(`[CDP] cdpKeepCurrentPage mode: using existing page as initial tab: ${tabId}, URL: ${validPage.url()}`);
114
+ } else {
115
+ throw new Error('No active pages available in CDP mode with cdpKeepCurrentPage=true (all pages are closed)');
116
+ }
117
+ } else {
118
+ throw new Error('No pages available in CDP mode with cdpKeepCurrentPage=true');
119
+ }
120
+ } else {
121
+ // Look for blank pages or create new ones
122
+ if (pages.length > 0) {
123
+ // Find one available blank page
124
+ let availablePageFound = false;
125
+ for (const page of pages) {
126
+ const pageUrl = page.url();
127
+ if (this.isBlankPageUrl(pageUrl)) {
128
+ const tabId = this.generateTabId();
129
+ this.registerNewPage(tabId, page);
130
+ this.currentTabId = tabId;
131
+ availablePageFound = true;
132
+ console.log(`[CDP] Registered blank page as initial tab: ${tabId}, URL: ${pageUrl}`);
133
+ break;
134
+ }
135
+ }
136
+
137
+ if (!availablePageFound) {
138
+ console.log('[CDP] No blank pages found, creating new page');
139
+ const newPage = await this.context.newPage();
140
+ const tabId = this.generateTabId();
141
+ this.registerNewPage(tabId, newPage);
142
+ this.currentTabId = tabId;
143
+ }
144
+ } else {
145
+ console.log('[CDP] No existing pages, creating initial page');
146
+ const newPage = await this.context.newPage();
147
+ const tabId = this.generateTabId();
148
+ this.registerNewPage(tabId, newPage);
149
+ this.currentTabId = tabId;
150
+ }
151
+ }
152
+ } else {
153
+ // Original launch logic
154
+ const launchOptions: any = {
155
+ headless: browserConfig.headless,
156
+ };
157
+
158
+ if (stealthConfig.enabled) {
159
+ launchOptions.args = stealthConfig.args || [];
160
+
161
+ // Apply stealth user agent/headers if configured
162
+ if (stealthConfig.userAgent) {
163
+ launchOptions.userAgent = stealthConfig.userAgent;
164
+ }
165
+ if (stealthConfig.extraHTTPHeaders) {
166
+ launchOptions.extraHTTPHeaders = stealthConfig.extraHTTPHeaders;
167
+ }
168
+ }
169
+
170
+ if (browserConfig.userDataDir) {
171
+ // Ensure viewport is honored in persistent context
172
+ launchOptions.viewport = browserConfig.viewport;
173
+ this.context = await chromium.launchPersistentContext(
174
+ browserConfig.userDataDir,
175
+ launchOptions
176
+ );
177
+ this.contextOwnedByUs = true;
178
+ this.browser = this.context.browser();
179
+ const pages = this.context.pages();
180
+ if (pages.length > 0) {
181
+ const initialTabId = this.generateTabId();
182
+ this.registerNewPage(initialTabId, pages[0]);
183
+ this.currentTabId = initialTabId;
184
+ }
185
+ } else {
186
+ this.browser = await chromium.launch(launchOptions);
187
+ const contextOptions: any = {
188
+ viewport: browserConfig.viewport
189
+ };
190
+
191
+ // Apply stealth headers and UA if configured
192
+ if (stealthConfig.enabled) {
193
+ if (stealthConfig.extraHTTPHeaders) {
194
+ contextOptions.extraHTTPHeaders = stealthConfig.extraHTTPHeaders;
195
+ }
196
+ if (stealthConfig.userAgent) {
197
+ contextOptions.userAgent = stealthConfig.userAgent;
198
+ }
199
+ }
200
+
201
+ this.context = await this.browser.newContext(contextOptions);
202
+ this.contextOwnedByUs = true;
203
+
204
+ const initialPage = await this.context.newPage();
205
+ const initialTabId = this.generateTabId();
206
+ this.registerNewPage(initialTabId, initialPage);
207
+ this.currentTabId = initialTabId;
208
+ }
209
+ }
210
+
211
+ // Set timeouts
212
+ for (const page of this.pages.values()) {
213
+ page.setDefaultNavigationTimeout(browserConfig.navigationTimeout);
214
+ page.setDefaultTimeout(browserConfig.navigationTimeout);
215
+ }
216
+ }
217
+
218
+ private generateTabId(): string {
219
+ const browserConfig = this.configLoader.getBrowserConfig();
220
+ return `${browserConfig.tabIdPrefix}${String(++this.tabCounter).padStart(browserConfig.tabCounterPadding, '0')}`;
221
+ }
222
+
223
+ private isBlankPageUrl(url: string): boolean {
224
+ // Unified blank page detection logic used across the codebase
225
+ const browserConfig = this.configLoader.getBrowserConfig();
226
+ return (
227
+ // Standard about:blank variations (prefix match for query params)
228
+ url === 'about:blank' ||
229
+ url.startsWith('about:blank?') ||
230
+ // Configured blank page URLs (exact match for compatibility)
231
+ browserConfig.blankPageUrls.includes(url) ||
232
+ // Empty URL
233
+ url === '' ||
234
+ // Data URLs (often used for blank pages)
235
+ url.startsWith(browserConfig.dataUrlPrefix || 'data:')
236
+ );
237
+ }
238
+
239
+ async getCurrentPage(): Promise<Page> {
240
+ if (!this.currentTabId || !this.pages.has(this.currentTabId)) {
241
+ const browserConfig = this.configLoader.getBrowserConfig();
242
+
243
+ // In CDP keep-current-page mode, find existing page
244
+ if (browserConfig.cdpKeepCurrentPage && browserConfig.cdpUrl && this.context) {
245
+ const allPages = this.context.pages();
246
+ console.log(`[getCurrentPage] cdpKeepCurrentPage mode: Looking for existing page, found ${allPages.length} pages`);
247
+
248
+ if (allPages.length > 0) {
249
+ // Try to find a page that's not already tracked
250
+ for (const page of allPages) {
251
+ const isTracked = Array.from(this.pages.values()).includes(page);
252
+ if (!isTracked && !page.isClosed()) {
253
+ const tabId = this.generateTabId();
254
+ this.registerNewPage(tabId, page);
255
+ this.currentTabId = tabId;
256
+ console.log(`[getCurrentPage] cdpKeepCurrentPage mode: Found and registered untracked page: ${tabId}`);
257
+ return page;
258
+ }
259
+ }
260
+
261
+ // If all pages are tracked, use the first available one
262
+ const firstPage = allPages[0];
263
+ if (!firstPage.isClosed()) {
264
+ // Find the tab ID for this page
265
+ for (const [tabId, page] of this.pages.entries()) {
266
+ if (page === firstPage) {
267
+ this.currentTabId = tabId;
268
+ console.log(`[getCurrentPage] cdpKeepCurrentPage mode: Using existing tracked page: ${tabId}`);
269
+ return page;
270
+ }
271
+ }
272
+ }
273
+ }
274
+
275
+ throw new Error('No active page available in CDP mode with cdpKeepCurrentPage=true');
276
+ }
277
+
278
+ // Normal mode: create new page
279
+ if (this.context) {
280
+ console.log('[getCurrentPage] No active page, creating new page');
281
+ const newPage = await this.context.newPage();
282
+ const tabId = this.generateTabId();
283
+ this.registerNewPage(tabId, newPage);
284
+ this.currentTabId = tabId;
285
+
286
+ newPage.setDefaultNavigationTimeout(browserConfig.navigationTimeout);
287
+ newPage.setDefaultTimeout(browserConfig.navigationTimeout);
288
+
289
+ return newPage;
290
+ }
291
+ throw new Error('No browser context available');
292
+ }
293
+ return this.pages.get(this.currentTabId)!;
294
+ }
295
+
296
+ async getCurrentLogs(): Promise<ConsoleMessage[]> {
297
+ if (!this.currentTabId || !this.consoleLogs.has(this.currentTabId)) {
298
+ return [];
299
+ }
300
+ return this.consoleLogs.get(this.currentTabId) || [];
301
+ }
302
+
303
+ /**
304
+ * Get current scroll position from the page
305
+ */
306
+ private async getCurrentScrollPosition(): Promise<{ x: number; y: number }> {
307
+ try {
308
+ const page = await this.getCurrentPage();
309
+ const scrollInfo = await page.evaluate(() => {
310
+ return {
311
+ x: window.pageXOffset || document.documentElement.scrollLeft || 0,
312
+ y: window.pageYOffset || document.documentElement.scrollTop || 0,
313
+ devicePixelRatio: window.devicePixelRatio || 1,
314
+ zoomLevel: window.outerWidth / window.innerWidth || 1
315
+ };
316
+ }) as { x: number; y: number; devicePixelRatio: number; zoomLevel: number };
317
+
318
+ // Store scroll position
319
+ this.scrollPosition = { x: scrollInfo.x, y: scrollInfo.y };
320
+ return this.scrollPosition;
321
+ } catch (error) {
322
+ console.warn('Failed to get scroll position:', error);
323
+ return this.scrollPosition;
324
+ }
325
+ }
326
+
327
+ async getSnapshotForAI(includeCoordinates = false, viewportLimit = false): Promise<SnapshotResult & { timing: DetailedTiming }> {
328
+ // Always use native Playwright mapping - this is the correct approach
329
+ return this.getSnapshotForAINative(includeCoordinates, viewportLimit);
330
+ }
331
+
332
+
333
+ private buildSnapshotIndex(snapshotText: string): Map<string, string|undefined> {
334
+ // Get ref to role mapping from snapshot text
335
+ const index = new Map<string, string|undefined>();
336
+ const refRe = /\[ref=([^\]]+)\]/i;
337
+ for (const line of snapshotText.split('\n')) {
338
+ const m = line.match(refRe);
339
+ if (!m) continue;
340
+ const ref = m[1];
341
+ const roleMatch = line.match(/^\s*-?\s*([a-z0-9_-]+)/i);
342
+ const role = roleMatch ? roleMatch[1].toLowerCase() : undefined;
343
+ index.set(ref, role);
344
+ }
345
+ return index;
346
+ }
347
+
348
+
349
+ private filterElementsInViewport(
350
+ elements: Record<string, SnapshotElement>,
351
+ viewport: { width: number, height: number },
352
+ scrollPos: { x: number, y: number }
353
+ ): Record<string, SnapshotElement> {
354
+ const filtered: Record<string, SnapshotElement> = {};
355
+ // Apply viewport filtering
356
+ // boundingBox() returns viewport-relative coordinates, so we don't need to add scroll offsets
357
+ const viewportLeft = 0;
358
+ const viewportTop = 0;
359
+ const viewportRight = viewport.width;
360
+ const viewportBottom = viewport.height;
361
+
362
+ for (const [ref, element] of Object.entries(elements)) {
363
+ // If element has no coordinates, include it (fallback)
364
+ if (!element.coordinates) {
365
+ filtered[ref] = element;
366
+ continue;
367
+ }
368
+
369
+ const { x, y, width, height } = element.coordinates;
370
+
371
+ // Check if element is visible in current viewport
372
+ // Element is visible if it overlaps with viewport bounds
373
+ // Since boundingBox() coords are viewport-relative, we compare directly
374
+ const isVisible = (
375
+ x < viewportRight && // Left edge is before viewport right
376
+ y < viewportBottom && // Top edge is before viewport bottom
377
+ x + width > viewportLeft && // Right edge is after viewport left
378
+ y + height > viewportTop // Bottom edge is after viewport top
379
+ );
380
+
381
+ if (isVisible) {
382
+ filtered[ref] = element;
383
+ }
384
+ }
385
+
386
+ return filtered;
387
+ }
388
+
389
+
390
+ private filterSnapshotLines(
391
+ lines: string[],
392
+ viewportRefs: Set<string>,
393
+ tabSize: number = 2
394
+ ): string[] {
395
+ // Filter snapshot lines to include only those in viewportRefs
396
+ // and their context
397
+ const levelStack: number[] = [];
398
+ const filteredLines: string[] = [];
399
+ for (const line of lines) {
400
+ const refMatch = line.match(/\[ref=([^\]]+)\]/);
401
+ const indentMatch = line.match(/^(\s*)/);
402
+ const level = indentMatch ? indentMatch[1].length / tabSize : 0;
403
+ const prevLevel = levelStack[levelStack.length - 1] ?? 0;
404
+ const levelDiff = level - prevLevel;
405
+ // Pop stack when going up to the parent level
406
+ if (levelDiff <= 0) {
407
+ while (
408
+ levelStack.length > 0 &&
409
+ levelStack[levelStack.length - 1] >= level
410
+ ) {
411
+ levelStack.pop();
412
+ }
413
+ }
414
+ // Line has a ref
415
+ if (refMatch && viewportRefs.has(refMatch[1])) {
416
+ levelStack.push(level);
417
+ filteredLines.push(line);
418
+ continue;
419
+ }
420
+ // Line without ref - include if it's a header or direct child of tracked element
421
+ if (!refMatch && (levelDiff === 0 || levelDiff === 1)) {
422
+ filteredLines.push(line);
423
+ }
424
+ }
425
+ return filteredLines;
426
+ }
427
+
428
+ private rebuildSnapshotText(
429
+ originalSnapshot: string,
430
+ filteredElements: Record<string, SnapshotElement>): string {
431
+ const lines = originalSnapshot.split('\n');
432
+ const filteredLines = this.filterSnapshotLines(lines, new Set(Object.keys(filteredElements)));
433
+ const filteredContent = filteredLines.join('\n');
434
+ return filteredContent;
435
+ }
436
+
437
+
438
+ private async getSnapshotForAINative(includeCoordinates = false, viewportLimit = false): Promise<SnapshotResult & { timing: DetailedTiming }> {
439
+ const startTime = Date.now();
440
+ const page = await this.getCurrentPage();
441
+
442
+ try {
443
+ // Use _snapshotForAI() to properly update _lastAriaSnapshot
444
+ const snapshotStart = Date.now();
445
+ const snapshotText = await (page as any)._snapshotForAI();
446
+ const snapshotTime = Date.now() - snapshotStart;
447
+
448
+ // Extract refs from the snapshot text
449
+ const refPattern = /\[ref=([^\]]+)\]/g;
450
+ const refs: string[] = [];
451
+ let match;
452
+ while ((match = refPattern.exec(snapshotText)) !== null) {
453
+ refs.push(match[1]);
454
+ }
455
+
456
+ // Get element information including coordinates if needed
457
+ const mappingStart = Date.now();
458
+ const playwrightMapping: Record<string, any> = {};
459
+
460
+ // Parse element info in a single pass
461
+ const snapshotIndex = this.buildSnapshotIndex(snapshotText);
462
+ for (const ref of refs) {
463
+ const role = snapshotIndex.get(ref) || undefined;
464
+ playwrightMapping[ref] = {
465
+ ref,
466
+ role: role || 'unknown',
467
+ };
468
+ }
469
+
470
+ if (includeCoordinates) {
471
+ // Get coordinates for each ref using aria-ref selector
472
+ for (const ref of refs) {
473
+ try {
474
+ const selector = `aria-ref=${ref}`;
475
+ const element = await page.locator(selector).first();
476
+ const exists = await element.count() > 0;
477
+
478
+ if (exists) {
479
+ // Get bounding box
480
+ const boundingBox = await element.boundingBox();
481
+
482
+ if (boundingBox) {
483
+ // Add coordinates to existing element info
484
+ playwrightMapping[ref] = {
485
+ ...playwrightMapping[ref],
486
+ coordinates: {
487
+ x: Math.round(boundingBox.x),
488
+ y: Math.round(boundingBox.y),
489
+ width: Math.round(boundingBox.width),
490
+ height: Math.round(boundingBox.height)
491
+ }
492
+ };
493
+ }
494
+ }
495
+ } catch (error) {
496
+ console.warn(`Failed to get coordinates for ref ${ref}:`, error);
497
+ }
498
+ }
499
+ }
500
+
501
+ const mappingTime = Date.now() - mappingStart;
502
+
503
+ // Apply viewport filtering if requested
504
+ let finalElements = playwrightMapping;
505
+ let finalSnapshot = snapshotText;
506
+
507
+ if (viewportLimit) {
508
+ const viewport = page.viewportSize() || { width: 1280, height: 720 };
509
+ const scrollPos = await this.getCurrentScrollPosition();
510
+ finalElements = this.filterElementsInViewport(playwrightMapping, viewport, scrollPos);
511
+ finalSnapshot = this.rebuildSnapshotText(snapshotText, finalElements);
512
+ }
513
+
514
+ const totalTime = Date.now() - startTime;
515
+
516
+ return {
517
+ snapshot: finalSnapshot,
518
+ elements: finalElements,
519
+ metadata: {
520
+ elementCount: Object.keys(finalElements).length,
521
+ url: page.url(),
522
+ timestamp: new Date().toISOString(),
523
+ },
524
+ timing: {
525
+ total_time_ms: totalTime,
526
+ snapshot_time_ms: snapshotTime,
527
+ coordinate_enrichment_time_ms: 0, // Integrated into mapping
528
+ aria_mapping_time_ms: mappingTime,
529
+ },
530
+ };
531
+ } catch (error) {
532
+ console.error('Failed to get AI snapshot with native mapping:', error);
533
+ const totalTime = Date.now() - startTime;
534
+
535
+ return {
536
+ snapshot: 'Error: Unable to capture page snapshot',
537
+ elements: {},
538
+ metadata: {
539
+ elementCount: 0,
540
+ url: page.url(),
541
+ timestamp: new Date().toISOString(),
542
+ },
543
+ timing: {
544
+ total_time_ms: totalTime,
545
+ snapshot_time_ms: 0,
546
+ coordinate_enrichment_time_ms: 0,
547
+ aria_mapping_time_ms: 0,
548
+ },
549
+ };
550
+ }
551
+ }
552
+
553
+
554
+
555
+ /**
556
+ * Enhanced click implementation with new tab detection and scroll fix
557
+ */
558
+ private async performClick(page: Page, ref: string): Promise<{ success: boolean; method?: string; error?: string; newTabId?: string; diffSnapshot?: string }> {
559
+
560
+ try {
561
+ // Ensure we have the latest snapshot and mapping
562
+ await (page as any)._snapshotForAI();
563
+
564
+ // Use Playwright's aria-ref selector engine
565
+ const selector = `aria-ref=${ref}`;
566
+
567
+ // Check if element exists
568
+ const element = await page.locator(selector).first();
569
+ const exists = await element.count() > 0;
570
+
571
+ if (!exists) {
572
+ return { success: false, error: `Element with ref ${ref} not found` };
573
+ }
574
+
575
+ const role = await element.getAttribute('role');
576
+ const elementTagName = await element.evaluate(el => el.tagName.toLowerCase());
577
+ const isCombobox = role === 'combobox' || elementTagName === 'combobox';
578
+ const isTextbox = role === 'textbox' || elementTagName === 'input' || elementTagName === 'textarea';
579
+ const shouldCheckDiff = isCombobox || isTextbox;
580
+
581
+ let snapshotBefore: string | null = null;
582
+ let comboboxAriaLabel: string | null = null;
583
+ if (shouldCheckDiff) {
584
+ snapshotBefore = await (page as any)._snapshotForAI();
585
+ // Capture aria-label for combobox to find it again after click (ref may change)
586
+ if (isCombobox) {
587
+ comboboxAriaLabel = await element.getAttribute('aria-label');
588
+ if (!comboboxAriaLabel) {
589
+ // Try to get accessible name from aria-labelledby or inner text
590
+ comboboxAriaLabel = await element.evaluate(el => {
591
+ const labelledBy = el.getAttribute('aria-labelledby');
592
+ if (labelledBy) {
593
+ const labelEl = document.getElementById(labelledBy);
594
+ if (labelEl) return labelEl.textContent?.trim() || null;
595
+ }
596
+ return el.textContent?.trim() || null;
597
+ });
598
+ }
599
+ }
600
+ }
601
+
602
+ // Check element properties
603
+ const browserConfig = this.configLoader.getBrowserConfig();
604
+ const target = await element.getAttribute(browserConfig.targetAttribute);
605
+ const href = await element.getAttribute(browserConfig.hrefAttribute);
606
+ const onclick = await element.getAttribute(browserConfig.onclickAttribute);
607
+ const tagName = await element.evaluate(el => el.tagName.toLowerCase());
608
+
609
+ // Check if element naturally opens new tab
610
+ const naturallyOpensNewTab = (
611
+ target === browserConfig.blankTarget ||
612
+ (onclick && onclick.includes(browserConfig.windowOpenString)) ||
613
+ (tagName === 'a' && href && (href.includes(`javascript:${browserConfig.windowOpenString}`) || href.includes(browserConfig.blankTarget)))
614
+ );
615
+
616
+ // Open ALL links in new tabs
617
+ // Check if this is a navigable link
618
+ const isNavigableLink = tagName === 'a' && href &&
619
+ !href.startsWith(browserConfig.anchorOnly) && // Not an anchor link
620
+ !href.startsWith(browserConfig.javascriptVoidPrefix) && // Not a void javascript
621
+ href !== browserConfig.javascriptVoidEmpty && // Not empty javascript
622
+ href !== browserConfig.anchorOnly; // Not just #
623
+
624
+ const shouldOpenNewTab = naturallyOpensNewTab || isNavigableLink;
625
+
626
+
627
+ if (shouldOpenNewTab) {
628
+ // Handle new tab opening
629
+ // If it's a link that doesn't naturally open in new tab, force it
630
+ if (isNavigableLink && !naturallyOpensNewTab) {
631
+ await element.evaluate((el, blankTarget) => {
632
+ if (el.tagName.toLowerCase() === 'a') {
633
+ el.setAttribute('target', blankTarget);
634
+ }
635
+ }, browserConfig.blankTarget);
636
+ }
637
+
638
+ // Set up popup listener before clicking
639
+ const popupPromise = page.context().waitForEvent('page', { timeout: browserConfig.popupTimeout });
640
+
641
+ // Click with force to avoid scrolling issues
642
+ await element.click({ force: browserConfig.forceClick });
643
+
644
+ try {
645
+ // Wait for new page to open
646
+ const newPage = await popupPromise;
647
+
648
+ // Generate tab ID for the new page
649
+ const newTabId = this.generateTabId();
650
+ this.registerNewPage(newTabId, newPage);
651
+
652
+ // Set up page properties
653
+ const browserConfig = this.configLoader.getBrowserConfig();
654
+ newPage.setDefaultNavigationTimeout(browserConfig.navigationTimeout);
655
+ newPage.setDefaultTimeout(browserConfig.navigationTimeout);
656
+
657
+
658
+ // Automatically switch to the new tab
659
+ this.currentTabId = newTabId;
660
+ await newPage.bringToFront();
661
+
662
+ // Wait for new page to be ready
663
+ await newPage.waitForLoadState('domcontentloaded', { timeout: browserConfig.popupTimeout }).catch(() => {});
664
+
665
+ return { success: true, method: 'playwright-aria-ref-newtab', newTabId };
666
+ } catch (popupError) {
667
+ return { success: true, method: 'playwright-aria-ref' };
668
+ }
669
+ } else {
670
+ // Add options to prevent scrolling issues
671
+ const browserConfig = this.configLoader.getBrowserConfig();
672
+ await element.click({ force: browserConfig.forceClick });
673
+
674
+ if (shouldCheckDiff && snapshotBefore) {
675
+ await page.waitForTimeout(300);
676
+ const snapshotAfter = await (page as any)._snapshotForAI();
677
+ let diffSnapshot = this.getSnapshotDiff(snapshotBefore, snapshotAfter, ['option', 'menuitem']);
678
+
679
+ // For combobox, find the new ref based on aria-label and prepend to diffSnapshot
680
+ if (isCombobox && comboboxAriaLabel) {
681
+ const newComboboxRef = this.findComboboxRefByAriaLabel(snapshotAfter, comboboxAriaLabel);
682
+ if (newComboboxRef) {
683
+ // Find the full line for this combobox in the snapshot
684
+ const comboboxLine = this.findSnapshotLineByRef(snapshotAfter, newComboboxRef);
685
+ if (comboboxLine) {
686
+ diffSnapshot = comboboxLine + (diffSnapshot ? '\n' + diffSnapshot : '');
687
+ }
688
+ }
689
+ }
690
+
691
+ if (diffSnapshot && diffSnapshot.trim() !== '') {
692
+ return { success: true, method: 'playwright-aria-ref', diffSnapshot };
693
+ }
694
+ }
695
+
696
+ return { success: true, method: 'playwright-aria-ref' };
697
+ }
698
+
699
+ } catch (error) {
700
+ console.error('[performClick] Exception during click for ref: %s', ref, error);
701
+ return { success: false, error: `Click failed with exception: ${error}` };
702
+ }
703
+ }
704
+
705
+ /**
706
+ * Extract diff between two snapshots, returning only new elements of specified types
707
+ */
708
+ private getSnapshotDiff(snapshotBefore: string, snapshotAfter: string, targetRoles: string[]): string {
709
+ const refsBefore = new Set<string>();
710
+ const refPattern = /\[ref=([^\]]+)\]/g;
711
+ let match;
712
+ while ((match = refPattern.exec(snapshotBefore)) !== null) {
713
+ refsBefore.add(match[1]);
714
+ }
715
+
716
+ const lines = snapshotAfter.split('\n');
717
+ const newElements: string[] = [];
718
+
719
+ for (const line of lines) {
720
+ const refMatch = line.match(/\[ref=([^\]]+)\]/);
721
+ if (refMatch && !refsBefore.has(refMatch[1])) {
722
+ const hasTargetRole = targetRoles.some(role => {
723
+ const rolePattern = new RegExp(`\\b${role}\\b`, 'i');
724
+ return rolePattern.test(line);
725
+ });
726
+
727
+ if (hasTargetRole) {
728
+ newElements.push(line.trim());
729
+ }
730
+ }
731
+ }
732
+
733
+ if (newElements.length > 0) {
734
+ return newElements.join('\n');
735
+ } else {
736
+ return '';
737
+ }
738
+ }
739
+
740
+ /**
741
+ * Find a combobox ref in the snapshot by its aria-label or expanded state
742
+ */
743
+ private findComboboxRefByAriaLabel(snapshot: string, ariaLabel: string): string | null {
744
+ const lines = snapshot.split('\n');
745
+ // Escape special regex characters in ariaLabel
746
+ const escapedLabel = ariaLabel.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
747
+
748
+ // First, try to find by aria-label
749
+ for (const line of lines) {
750
+ const isCombobox = /\bcombobox\b/i.test(line);
751
+ const hasLabel = new RegExp(`["']${escapedLabel}["']`, 'i').test(line) ||
752
+ line.includes(ariaLabel);
753
+
754
+ if (isCombobox && hasLabel) {
755
+ const refMatch = line.match(/\[ref=([^\]]+)\]/);
756
+ if (refMatch) {
757
+ return refMatch[1];
758
+ }
759
+ }
760
+ }
761
+
762
+ // Fallback: find the expanded combobox (since we just clicked it)
763
+ for (const line of lines) {
764
+ const isCombobox = /\bcombobox\b/i.test(line);
765
+ const isExpanded = /\[expanded\]/i.test(line);
766
+
767
+ if (isCombobox && isExpanded) {
768
+ const refMatch = line.match(/\[ref=([^\]]+)\]/);
769
+ if (refMatch) {
770
+ return refMatch[1];
771
+ }
772
+ }
773
+ }
774
+
775
+ return null;
776
+ }
777
+
778
+ /**
779
+ * Find the full snapshot line for a given ref
780
+ */
781
+ private findSnapshotLineByRef(snapshot: string, ref: string): string | null {
782
+ const lines = snapshot.split('\n');
783
+ for (const line of lines) {
784
+ if (line.includes(`[ref=${ref}]`)) {
785
+ return line.trim();
786
+ }
787
+ }
788
+ return null;
789
+ }
790
+
791
+ /**
792
+ * Simplified type implementation using Playwright's aria-ref selector
793
+ * Supports both single and multiple input operations
794
+ */
795
+ private async performType(page: Page, ref: string | undefined, text: string | undefined, inputs?: Array<{ ref: string; text: string }>): Promise<{ success: boolean; error?: string; details?: Record<string, any>; diffSnapshot?: string }> {
796
+ try {
797
+ // Ensure we have the latest snapshot
798
+ await (page as any)._snapshotForAI();
799
+
800
+ // Handle multiple inputs if provided
801
+ if (inputs && inputs.length > 0) {
802
+ const results: Record<string, { success: boolean; error?: string }> = {};
803
+
804
+ for (const input of inputs) {
805
+ const singleResult = await this.performType(page, input.ref, input.text);
806
+ results[input.ref] = {
807
+ success: singleResult.success,
808
+ error: singleResult.error
809
+ };
810
+ }
811
+
812
+ // Check if all inputs were successful
813
+ const allSuccess = Object.values(results).every(r => r.success);
814
+ const errors = Object.entries(results)
815
+ .filter(([_, r]) => !r.success)
816
+ .map(([ref, r]) => `${ref}: ${r.error}`)
817
+ .join('; ');
818
+
819
+ return {
820
+ success: allSuccess,
821
+ error: allSuccess ? undefined : `Some inputs failed: ${errors}`,
822
+ details: results
823
+ };
824
+ }
825
+
826
+ // Handle single input (backward compatibility)
827
+ if (ref && text !== undefined) {
828
+ const selector = `aria-ref=${ref}`;
829
+ const element = await page.locator(selector).first();
830
+
831
+ const exists = await element.count() > 0;
832
+ if (!exists) {
833
+ return { success: false, error: `Element with ref ${ref} not found` };
834
+ }
835
+
836
+ // Get element attributes to check if it's readonly or a special input type
837
+ let originalPlaceholder: string | null = null;
838
+ let isReadonly = false;
839
+ let elementType: string | null = null;
840
+ let isCombobox = false;
841
+ let isTextbox = false;
842
+ let shouldCheckDiff = false;
843
+
844
+ try {
845
+ // Get element info in one evaluation to minimize interactions
846
+ const elementInfo = await element.evaluate((el: any) => {
847
+ return {
848
+ placeholder: el.placeholder || null,
849
+ readonly: el.readOnly || el.hasAttribute('readonly'),
850
+ type: el.type || null,
851
+ tagName: el.tagName.toLowerCase(),
852
+ disabled: el.disabled || false,
853
+ role: el.getAttribute('role'),
854
+ ariaHaspopup: el.getAttribute('aria-haspopup')
855
+ };
856
+ });
857
+
858
+ originalPlaceholder = elementInfo.placeholder;
859
+ isReadonly = elementInfo.readonly;
860
+ elementType = elementInfo.type;
861
+ isCombobox = elementInfo.role === 'combobox' ||
862
+ elementInfo.tagName === 'combobox' ||
863
+ elementInfo.ariaHaspopup === 'listbox';
864
+ isTextbox = elementInfo.role === 'textbox' ||
865
+ elementInfo.tagName === 'input' ||
866
+ elementInfo.tagName === 'textarea';
867
+ shouldCheckDiff = isCombobox || isTextbox;
868
+
869
+ } catch (e) {
870
+ console.log(`Warning: Failed to get element attributes: ${e}`);
871
+ }
872
+
873
+ // Get snapshot before action to record existing elements
874
+ const snapshotBefore = await (page as any)._snapshotForAI();
875
+ const existingRefs = new Set<string>();
876
+ const refPattern = /\[ref=([^\]]+)\]/g;
877
+ let match;
878
+ while ((match = refPattern.exec(snapshotBefore)) !== null) {
879
+ existingRefs.add(match[1]);
880
+ }
881
+ console.log(`Found ${existingRefs.size} total elements before action`);
882
+
883
+ // If element is readonly or a date/time input, skip fill attempt and go directly to click
884
+ if (isReadonly || ['date', 'datetime-local', 'time'].includes(elementType || '')) {
885
+ console.log(`Element ref=${ref} is readonly or date/time input, skipping direct fill attempt`);
886
+
887
+ // Click with force option to avoid scrolling
888
+ try {
889
+ await element.click({ force: true });
890
+ console.log(`Clicked readonly/special element ref=${ref} to trigger dynamic content`);
891
+ // Wait for potential dynamic content to appear
892
+ await page.waitForTimeout(500);
893
+ } catch (clickError) {
894
+ console.log(`Warning: Failed to click element: ${clickError}`);
895
+ }
896
+ } else {
897
+ // Try to fill the element, with fallback to click-then-fill strategy
898
+ let alreadyClicked = false;
899
+ try {
900
+ let fillSuccess = false;
901
+
902
+ try {
903
+ // Strategy 1: Try to fill directly without clicking (for modern inputs like Google Flights combobox)
904
+ await element.fill(text, { timeout: 3000, force: true });
905
+ fillSuccess = true;
906
+ console.log(`Filled element ref=${ref} directly without clicking`);
907
+ } catch (directFillError) {
908
+ // Strategy 2: Click first, then fill (for traditional inputs that need activation)
909
+ console.log(`Direct fill failed for ref=${ref}, trying click-then-fill strategy`);
910
+ try {
911
+ await element.click({ force: true });
912
+ alreadyClicked = true;
913
+ console.log(`Clicked element ref=${ref} before typing`);
914
+ } catch (clickError) {
915
+ console.log(`Warning: Failed to click element before typing: ${clickError}`);
916
+ }
917
+
918
+ try {
919
+ await element.fill(text, { timeout: 3000, force: true });
920
+ fillSuccess = true;
921
+ console.log(`Filled element ref=${ref} after clicking`);
922
+ } catch (secondFillError) {
923
+ // Will be handled by outer catch block below
924
+ throw secondFillError;
925
+ }
926
+ }
927
+
928
+ if (fillSuccess) {
929
+ // If this element might show dropdown, wait and check for new elements
930
+ if (shouldCheckDiff) {
931
+ await page.waitForTimeout(300);
932
+ const snapshotAfter = await (page as any)._snapshotForAI();
933
+ const diffSnapshot = this.getSnapshotDiff(snapshotBefore, snapshotAfter, ['option', 'menuitem']);
934
+
935
+ if (diffSnapshot && diffSnapshot.trim() !== '') {
936
+ return { success: true, diffSnapshot };
937
+ }
938
+ }
939
+
940
+ return { success: true };
941
+ }
942
+ } catch (fillError: any) {
943
+ // Log the error for debugging
944
+ console.log(`Fill error for ref ${ref}: ${fillError.message}`);
945
+
946
+ // Check for various error messages that indicate the element is not fillable
947
+ const errorMessage = fillError.message.toLowerCase();
948
+ if (errorMessage.includes('not an <input>') ||
949
+ errorMessage.includes('not have a role allowing') ||
950
+ errorMessage.includes('element is not') ||
951
+ errorMessage.includes('cannot type') ||
952
+ errorMessage.includes('readonly') ||
953
+ errorMessage.includes('not editable') ||
954
+ errorMessage.includes('timeout') ||
955
+ errorMessage.includes('timeouterror')) {
956
+
957
+ // Click the element again to trigger dynamic content (like date pickers), but only if we haven't clicked yet
958
+ if (!alreadyClicked) {
959
+ try {
960
+ await element.click({ force: true });
961
+ console.log(`Clicked element ref=${ref} to trigger dynamic content`);
962
+ // Wait for potential dynamic content to appear
963
+ await page.waitForTimeout(500);
964
+ } catch (clickError) {
965
+ console.log(`Warning: Failed to click element to trigger dynamic content: ${clickError}`);
966
+ }
967
+ } else {
968
+ // We already clicked during the click-then-fill strategy
969
+ await page.waitForTimeout(500);
970
+ }
971
+
972
+ // Step 1: Try to find input elements within the clicked element
973
+ const inputSelector = `input:visible, textarea:visible, [contenteditable="true"]:visible, [role="textbox"]:visible`;
974
+ const inputElement = await element.locator(inputSelector).first();
975
+
976
+ const inputExists = await inputElement.count() > 0;
977
+ if (inputExists) {
978
+ console.log(`Found input element within ref ${ref}, attempting to fill`);
979
+ try {
980
+ await inputElement.fill(text, { force: true });
981
+
982
+ // If element might show dropdown, check for new elements
983
+ if (shouldCheckDiff) {
984
+ await page.waitForTimeout(300);
985
+ const snapshotFinal = await (page as any)._snapshotForAI();
986
+ const diffSnapshot = this.getSnapshotDiff(snapshotBefore, snapshotFinal, ['option', 'menuitem']);
987
+
988
+ if (diffSnapshot && diffSnapshot.trim() !== '') {
989
+ return { success: true, diffSnapshot };
990
+ }
991
+ }
992
+
993
+ return { success: true };
994
+ } catch (innerError) {
995
+ console.log(`Failed to fill child element: ${innerError}`);
996
+ }
997
+ }
998
+
999
+ // Step 2: Look for new elements that appeared after the action
1000
+ console.log(`Looking for new elements that appeared after action...`);
1001
+
1002
+ // Get snapshot after action to find new elements
1003
+ const snapshotAfter = await (page as any)._snapshotForAI();
1004
+ const newRefs = new Set<string>();
1005
+ const afterRefPattern = /\[ref=([^\]]+)\]/g;
1006
+ let afterMatch;
1007
+ while ((afterMatch = afterRefPattern.exec(snapshotAfter)) !== null) {
1008
+ const refId = afterMatch[1];
1009
+ if (!existingRefs.has(refId)) {
1010
+ newRefs.add(refId);
1011
+ }
1012
+ }
1013
+
1014
+ console.log(`Found ${newRefs.size} new elements after action`);
1015
+
1016
+ // If we have a placeholder, try to find new input elements with that placeholder
1017
+ if (originalPlaceholder && newRefs.size > 0) {
1018
+ console.log(`Looking for new input elements with placeholder: ${originalPlaceholder}`);
1019
+
1020
+ // Try each new ref to see if it's an input with our placeholder
1021
+ for (const newRef of newRefs) {
1022
+ try {
1023
+ const newElement = await page.locator(`aria-ref=${newRef}`).first();
1024
+ const tagName = await newElement.evaluate(el => el.tagName.toLowerCase()).catch(() => null);
1025
+
1026
+ if (tagName === 'input' || tagName === 'textarea') {
1027
+ const placeholder = await newElement.getAttribute('placeholder').catch(() => null);
1028
+ if (placeholder === originalPlaceholder) {
1029
+ console.log(`Found new input element with matching placeholder: ref=${newRef}`);
1030
+
1031
+ // Check if it's visible and fillable
1032
+ const elementInfo = await newElement.evaluate((el: any) => {
1033
+ return {
1034
+ tagName: el.tagName,
1035
+ id: el.id,
1036
+ className: el.className,
1037
+ placeholder: el.placeholder,
1038
+ isVisible: el.offsetParent !== null,
1039
+ isReadonly: el.readOnly || el.getAttribute('readonly') !== null
1040
+ };
1041
+ });
1042
+ console.log(`New element details:`, JSON.stringify(elementInfo));
1043
+
1044
+ // Try to fill it with force to avoid scrolling
1045
+ await newElement.fill(text, { force: true });
1046
+
1047
+ // If element might show dropdown, check for new elements
1048
+ if (shouldCheckDiff) {
1049
+ await page.waitForTimeout(300);
1050
+ const snapshotFinal = await (page as any)._snapshotForAI();
1051
+ const diffSnapshot = this.getSnapshotDiff(snapshotBefore, snapshotFinal, ['option', 'menuitem']);
1052
+
1053
+ if (diffSnapshot && diffSnapshot.trim() !== '') {
1054
+ return { success: true, diffSnapshot };
1055
+ }
1056
+ }
1057
+
1058
+ return { success: true };
1059
+ }
1060
+ }
1061
+ } catch (e) {
1062
+ // Ignore errors for non-input elements
1063
+ }
1064
+ }
1065
+ }
1066
+
1067
+ console.log(`No suitable input element found for ref ${ref}`);
1068
+ }
1069
+ // Re-throw the original error if we couldn't find an input element
1070
+ throw fillError;
1071
+ }
1072
+ }
1073
+
1074
+ // If we skipped the fill attempt (readonly elements), look for new elements directly
1075
+ if (isReadonly || ['date', 'datetime-local', 'time'].includes(elementType || '')) {
1076
+ // Look for new elements that appeared after clicking
1077
+ console.log(`Looking for new elements that appeared after clicking readonly element...`);
1078
+
1079
+ // Get snapshot after action to find new elements
1080
+ const snapshotAfter = await (page as any)._snapshotForAI();
1081
+ const newRefs = new Set<string>();
1082
+ const afterRefPattern = /\[ref=([^\]]+)\]/g;
1083
+ let afterMatch;
1084
+ while ((afterMatch = afterRefPattern.exec(snapshotAfter)) !== null) {
1085
+ const refId = afterMatch[1];
1086
+ if (!existingRefs.has(refId)) {
1087
+ newRefs.add(refId);
1088
+ }
1089
+ }
1090
+
1091
+ console.log(`Found ${newRefs.size} new elements after clicking readonly element`);
1092
+
1093
+ // If we have a placeholder, try to find new input elements with that placeholder
1094
+ if (originalPlaceholder && newRefs.size > 0) {
1095
+ console.log(`Looking for new input elements with placeholder: ${originalPlaceholder}`);
1096
+
1097
+ // Try each new ref to see if it's an input with our placeholder
1098
+ for (const newRef of newRefs) {
1099
+ try {
1100
+ const newElement = await page.locator(`aria-ref=${newRef}`).first();
1101
+ const tagName = await newElement.evaluate(el => el.tagName.toLowerCase()).catch(() => null);
1102
+
1103
+ if (tagName === 'input' || tagName === 'textarea') {
1104
+ const placeholder = await newElement.getAttribute('placeholder').catch(() => null);
1105
+ if (placeholder === originalPlaceholder) {
1106
+ console.log(`Found new input element with matching placeholder: ref=${newRef}`);
1107
+
1108
+ // Check if it's visible and fillable
1109
+ const elementInfo = await newElement.evaluate((el: any) => {
1110
+ return {
1111
+ tagName: el.tagName,
1112
+ id: el.id,
1113
+ className: el.className,
1114
+ placeholder: el.placeholder,
1115
+ isVisible: el.offsetParent !== null,
1116
+ isReadonly: el.readOnly || el.getAttribute('readonly') !== null
1117
+ };
1118
+ });
1119
+ console.log(`New element details:`, JSON.stringify(elementInfo));
1120
+
1121
+ // Try to fill it with force to avoid scrolling
1122
+ await newElement.fill(text, { force: true });
1123
+
1124
+ // If element might show dropdown, check for new elements
1125
+ if (shouldCheckDiff) {
1126
+ await page.waitForTimeout(300);
1127
+ const snapshotFinal = await (page as any)._snapshotForAI();
1128
+ const diffSnapshot = this.getSnapshotDiff(snapshotBefore, snapshotFinal, ['option', 'menuitem']);
1129
+
1130
+ if (diffSnapshot && diffSnapshot.trim() !== '') {
1131
+ return { success: true, diffSnapshot };
1132
+ }
1133
+ }
1134
+
1135
+ return { success: true };
1136
+ }
1137
+ }
1138
+ } catch (e) {
1139
+ // Ignore errors for non-input elements
1140
+ }
1141
+ }
1142
+ }
1143
+
1144
+ console.log(`No suitable input element found for readonly ref ${ref}`);
1145
+ return { success: false, error: `Element ref=${ref} is readonly and no suitable input was found` };
1146
+ }
1147
+ }
1148
+
1149
+ return { success: false, error: 'No valid input provided' };
1150
+ } catch (error) {
1151
+ return { success: false, error: `Type failed: ${error}` };
1152
+ }
1153
+ }
1154
+
1155
+ /**
1156
+ * Simplified select implementation using Playwright's aria-ref selector
1157
+ */
1158
+ private async performSelect(page: Page, ref: string, value: string): Promise<{ success: boolean; error?: string }> {
1159
+ try {
1160
+ // Ensure we have the latest snapshot
1161
+ await (page as any)._snapshotForAI();
1162
+
1163
+ // Use Playwright's aria-ref selector
1164
+ const selector = `aria-ref=${ref}`;
1165
+ const element = await page.locator(selector).first();
1166
+
1167
+ const exists = await element.count() > 0;
1168
+ if (!exists) {
1169
+ return { success: false, error: `Element with ref ${ref} not found` };
1170
+ }
1171
+
1172
+ // Select value using Playwright's built-in selectOption method
1173
+ await element.selectOption(value);
1174
+
1175
+ return { success: true };
1176
+ } catch (error) {
1177
+ return { success: false, error: `Select failed: ${error}` };
1178
+ }
1179
+ }
1180
+
1181
+ /**
1182
+ * Simplified mouse control implementation
1183
+ */
1184
+ private async performMouseControl(page: Page, control: string, x: number, y: number): Promise<{ success: boolean; error?: string }> {
1185
+ try {
1186
+ const viewport = page.viewportSize();
1187
+ if (!viewport) {
1188
+ return { success: false, error: 'Viewport size not available from page.' };
1189
+ }
1190
+ if (x < 0 || y < 0 || x > viewport.width || y > viewport.height) {
1191
+ return { success: false, error: `Invalid coordinates, outside viewport bounds: (${x}, ${y})` };
1192
+ }
1193
+ switch (control) {
1194
+ case 'click': {
1195
+ await page.mouse.click(x, y);
1196
+ break;
1197
+ }
1198
+ case 'right_click': {
1199
+ await page.mouse.click(x, y, { button: 'right' });
1200
+ break;
1201
+ }
1202
+ case 'dblclick': {
1203
+ await page.mouse.dblclick(x, y);
1204
+ break;
1205
+ }
1206
+ default:
1207
+ return { success: false, error: `Invalid control action: ${control}` };
1208
+ }
1209
+
1210
+ return { success: true };
1211
+ } catch (error) {
1212
+ return { success: false, error: `Mouse action failed: ${error}` };
1213
+ }
1214
+ }
1215
+
1216
+ /**
1217
+ * Enhanced mouse drag and drop implementation using ref IDs
1218
+ */
1219
+ private async performMouseDrag(page: Page, fromRef: string, toRef: string): Promise<{ success: boolean; error?: string }> {
1220
+ try {
1221
+ // Ensure we have the latest snapshot
1222
+ await (page as any)._snapshotForAI();
1223
+
1224
+ // Get elements using Playwright's aria-ref selector
1225
+ const fromSelector = `aria-ref=${fromRef}`;
1226
+ const toSelector = `aria-ref=${toRef}`;
1227
+
1228
+ const fromElement = await page.locator(fromSelector).first();
1229
+ const toElement = await page.locator(toSelector).first();
1230
+
1231
+ // Check if elements exist
1232
+ const fromExists = await fromElement.count() > 0;
1233
+ const toExists = await toElement.count() > 0;
1234
+
1235
+ if (!fromExists) {
1236
+ return { success: false, error: `Source element with ref ${fromRef} not found` };
1237
+ }
1238
+
1239
+ if (!toExists) {
1240
+ return { success: false, error: `Target element with ref ${toRef} not found` };
1241
+ }
1242
+
1243
+ // Get the center coordinates of both elements
1244
+ const fromBox = await fromElement.boundingBox();
1245
+ const toBox = await toElement.boundingBox();
1246
+
1247
+ if (!fromBox) {
1248
+ return { success: false, error: `Could not get bounding box for source element with ref ${fromRef}` };
1249
+ }
1250
+
1251
+ if (!toBox) {
1252
+ return { success: false, error: `Could not get bounding box for target element with ref ${toRef}` };
1253
+ }
1254
+
1255
+ const fromX = fromBox.x + fromBox.width / 2;
1256
+ const fromY = fromBox.y + fromBox.height / 2;
1257
+ const toX = toBox.x + toBox.width / 2;
1258
+ const toY = toBox.y + toBox.height / 2;
1259
+
1260
+ // Perform the drag operation
1261
+ await page.mouse.move(fromX, fromY);
1262
+ await page.mouse.down();
1263
+ // Destination coordinates
1264
+ await page.mouse.move(toX, toY);
1265
+ await page.mouse.up();
1266
+
1267
+ return { success: true };
1268
+ } catch (error) {
1269
+ return { success: false, error: `Mouse drag action failed: ${error}` };
1270
+ }
1271
+ }
1272
+
1273
+ async executeAction(action: BrowserAction): Promise<ActionResult> {
1274
+ const startTime = Date.now();
1275
+ const page = await this.getCurrentPage();
1276
+
1277
+ let elementSearchTime = 0;
1278
+ let actionExecutionTime = 0;
1279
+ let stabilityWaitTime = 0;
1280
+
1281
+ try {
1282
+ const elementSearchStart = Date.now();
1283
+
1284
+ // No need to pre-fetch snapshot - each action method handles this
1285
+
1286
+ let newTabId: string | undefined;
1287
+ let customMessage: string | undefined;
1288
+ let actionDetails: Record<string, any> | undefined;
1289
+
1290
+ switch (action.type) {
1291
+ case 'click': {
1292
+ elementSearchTime = Date.now() - elementSearchStart;
1293
+ const clickStart = Date.now();
1294
+
1295
+ // Use simplified click logic
1296
+ const clickResult = await this.performClick(page, action.ref);
1297
+
1298
+ if (!clickResult.success) {
1299
+ throw new Error(`Click failed: ${clickResult.error}`);
1300
+ }
1301
+
1302
+ // Capture new tab ID if present
1303
+ newTabId = clickResult.newTabId;
1304
+
1305
+ // Capture diff snapshot if present
1306
+ if (clickResult.diffSnapshot) {
1307
+ actionDetails = { diffSnapshot: clickResult.diffSnapshot };
1308
+ }
1309
+
1310
+ actionExecutionTime = Date.now() - clickStart;
1311
+ break;
1312
+ }
1313
+
1314
+ case 'type': {
1315
+ elementSearchTime = Date.now() - elementSearchStart;
1316
+ const typeStart = Date.now();
1317
+
1318
+ const typeResult = await this.performType(page, action.ref, action.text, action.inputs);
1319
+
1320
+ if (!typeResult.success) {
1321
+ throw new Error(`Type failed: ${typeResult.error}`);
1322
+ }
1323
+
1324
+ // Set custom message and details if multiple inputs were used
1325
+ if (typeResult.details) {
1326
+ const successCount = Object.values(typeResult.details).filter((r: any) => r.success).length;
1327
+ const totalCount = Object.keys(typeResult.details).length;
1328
+ customMessage = `Typed text into ${successCount}/${totalCount} elements`;
1329
+ actionDetails = typeResult.details;
1330
+ }
1331
+
1332
+ // Capture diff snapshot if present
1333
+ if (typeResult.diffSnapshot) {
1334
+ if (!actionDetails) {
1335
+ actionDetails = {};
1336
+ }
1337
+ actionDetails.diffSnapshot = typeResult.diffSnapshot;
1338
+ }
1339
+
1340
+ actionExecutionTime = Date.now() - typeStart;
1341
+ break;
1342
+ }
1343
+
1344
+ case 'select': {
1345
+ elementSearchTime = Date.now() - elementSearchStart;
1346
+ const selectStart = Date.now();
1347
+
1348
+ const selectResult = await this.performSelect(page, action.ref, action.value);
1349
+
1350
+ if (!selectResult.success) {
1351
+ throw new Error(`Select failed: ${selectResult.error}`);
1352
+ }
1353
+
1354
+ actionExecutionTime = Date.now() - selectStart;
1355
+ break;
1356
+ }
1357
+
1358
+ case 'scroll': {
1359
+ elementSearchTime = Date.now() - elementSearchStart;
1360
+ const scrollStart = Date.now();
1361
+ const scrollAmount = action.direction === 'up' ? -action.amount : action.amount;
1362
+ await page.evaluate((amount: number) => {
1363
+ window.scrollBy(0, amount);
1364
+ }, scrollAmount);
1365
+ // Update scroll position tracking
1366
+ await this.getCurrentScrollPosition();
1367
+ actionExecutionTime = Date.now() - scrollStart;
1368
+ break;
1369
+ }
1370
+
1371
+ case 'enter': {
1372
+ elementSearchTime = Date.now() - elementSearchStart;
1373
+ const enterStart = Date.now();
1374
+ const browserConfig = this.configLoader.getBrowserConfig();
1375
+ await page.keyboard.press(browserConfig.enterKey);
1376
+ actionExecutionTime = Date.now() - enterStart;
1377
+ break;
1378
+ }
1379
+
1380
+ case 'mouse_control': {
1381
+ elementSearchTime = Date.now() - elementSearchStart;
1382
+ const mouseControlStart = Date.now();
1383
+ const mouseControlResult = await this.performMouseControl(page, action.control, action.x, action.y);
1384
+
1385
+ if (!mouseControlResult.success) {
1386
+ throw new Error(`Action failed: ${mouseControlResult.error}`);
1387
+ }
1388
+ actionExecutionTime = Date.now() - mouseControlStart;
1389
+ break;
1390
+ }
1391
+
1392
+ case 'mouse_drag': {
1393
+ elementSearchTime = Date.now() - elementSearchStart;
1394
+ const mouseDragStart = Date.now();
1395
+ const mouseDragResult = await this.performMouseDrag(page, action.from_ref, action.to_ref);
1396
+
1397
+ if (!mouseDragResult.success) {
1398
+ throw new Error(`Action failed: ${mouseDragResult.error}`);
1399
+ }
1400
+ actionExecutionTime = Date.now() - mouseDragStart;
1401
+ break;
1402
+ }
1403
+
1404
+ case 'press_key': {
1405
+ elementSearchTime = Date.now() - elementSearchStart;
1406
+ const keyPressStart = Date.now();
1407
+ // concatenate keys with '+' for key combinations
1408
+ const keys = action.keys.join('+');
1409
+ await page.keyboard.press(keys);
1410
+ actionExecutionTime = Date.now() - keyPressStart;
1411
+ break;
1412
+ }
1413
+
1414
+ default:
1415
+ throw new Error(`Unknown action type: ${(action as any).type}`);
1416
+ }
1417
+
1418
+ // Wait for stability after action
1419
+ const stabilityStart = Date.now();
1420
+ const stabilityResult = await this.waitForPageStability(page);
1421
+ stabilityWaitTime = Date.now() - stabilityStart;
1422
+
1423
+ const totalTime = Date.now() - startTime;
1424
+
1425
+ return {
1426
+ success: true,
1427
+ message: customMessage || `Action ${action.type} executed successfully`,
1428
+ timing: {
1429
+ total_time_ms: totalTime,
1430
+ element_search_time_ms: elementSearchTime,
1431
+ action_execution_time_ms: actionExecutionTime,
1432
+ stability_wait_time_ms: stabilityWaitTime,
1433
+ dom_content_loaded_time_ms: stabilityResult.domContentLoadedTime,
1434
+ network_idle_time_ms: stabilityResult.networkIdleTime,
1435
+ },
1436
+ ...(newTabId && { newTabId }), // Include new tab ID if present
1437
+ ...(actionDetails && { details: actionDetails }), // Include action details if present
1438
+ };
1439
+ } catch (error) {
1440
+ const totalTime = Date.now() - startTime;
1441
+ return {
1442
+ success: false,
1443
+ message: `Action ${action.type} failed: ${error}`,
1444
+ timing: {
1445
+ total_time_ms: totalTime,
1446
+ element_search_time_ms: elementSearchTime,
1447
+ action_execution_time_ms: actionExecutionTime,
1448
+ stability_wait_time_ms: stabilityWaitTime,
1449
+ },
1450
+ };
1451
+ }
1452
+ }
1453
+
1454
+ /**
1455
+ * Wait for DOM to stop changing for a specified duration
1456
+ */
1457
+ private async waitForDOMStability(page: Page, maxWaitTime: number = 500): Promise<void> {
1458
+ const startTime = Date.now();
1459
+ const stabilityThreshold = 100; // Consider stable if no changes for 100ms
1460
+ let lastChangeTime = Date.now();
1461
+
1462
+ try {
1463
+ // Monitor DOM changes
1464
+ await page.evaluate(() => {
1465
+ let changeCount = 0;
1466
+ (window as any).__domStabilityCheck = { changeCount: 0, lastChange: Date.now() };
1467
+
1468
+ const observer = new MutationObserver(() => {
1469
+ (window as any).__domStabilityCheck.changeCount++;
1470
+ (window as any).__domStabilityCheck.lastChange = Date.now();
1471
+ });
1472
+
1473
+ observer.observe(document.body, {
1474
+ childList: true,
1475
+ subtree: true,
1476
+ attributes: true,
1477
+ characterData: true
1478
+ });
1479
+
1480
+ (window as any).__domStabilityObserver = observer;
1481
+ });
1482
+
1483
+ // Wait until no changes for stabilityThreshold or timeout
1484
+ await page.waitForFunction(
1485
+ (threshold) => {
1486
+ const check = (window as any).__domStabilityCheck;
1487
+ return check && (Date.now() - check.lastChange) > threshold;
1488
+ },
1489
+ stabilityThreshold,
1490
+ { timeout: Math.max(0, maxWaitTime) }
1491
+ ).catch(() => {});
1492
+ } finally {
1493
+ // Cleanup
1494
+ await page.evaluate(() => {
1495
+ const observer = (window as any).__domStabilityObserver;
1496
+ if (observer) observer.disconnect();
1497
+ delete (window as any).__domStabilityObserver;
1498
+ delete (window as any).__domStabilityCheck;
1499
+ }).catch(() => {});
1500
+ }
1501
+ }
1502
+
1503
+ private async waitForPageStability(page: Page): Promise<{ domContentLoadedTime: number; networkIdleTime: number }> {
1504
+ let domContentLoadedTime = 0;
1505
+ let networkIdleTime = 0;
1506
+
1507
+ try {
1508
+ const domStart = Date.now();
1509
+ const browserConfig = this.configLoader.getBrowserConfig();
1510
+ await page.waitForLoadState(browserConfig.domContentLoadedState as any, { timeout: browserConfig.pageStabilityTimeout });
1511
+ domContentLoadedTime = Date.now() - domStart;
1512
+
1513
+ const networkStart = Date.now();
1514
+ await page.waitForLoadState(browserConfig.networkIdleState as any, { timeout: browserConfig.networkIdleTimeout });
1515
+ networkIdleTime = Date.now() - networkStart;
1516
+ } catch (error) {
1517
+ // Continue even if stability wait fails
1518
+ }
1519
+
1520
+ return { domContentLoadedTime, networkIdleTime };
1521
+ }
1522
+
1523
+ async visitPage(url: string): Promise<ActionResult & { newTabId?: string }> {
1524
+ const startTime = Date.now();
1525
+
1526
+ try {
1527
+ // Get current page to check if it's blank
1528
+ let currentPage: Page;
1529
+ let currentUrl: string;
1530
+
1531
+ try {
1532
+ currentPage = await this.getCurrentPage();
1533
+ currentUrl = currentPage.url();
1534
+ } catch (error: any) {
1535
+ // If no active page is available, getCurrentPage() will create one in CDP mode
1536
+ console.log('[visitPage] Failed to get current page:', error);
1537
+ throw new Error(`No active page available: ${error?.message || error}`);
1538
+ }
1539
+
1540
+ // Check if current page is blank or if this is the first navigation
1541
+ const browserConfig = this.configLoader.getBrowserConfig();
1542
+
1543
+ // Use unified blank page detection
1544
+ const isBlankPage = this.isBlankPageUrl(currentUrl) || currentUrl === browserConfig.defaultStartUrl;
1545
+
1546
+ const shouldUseCurrentTab = isBlankPage || !this.hasNavigatedBefore;
1547
+
1548
+
1549
+ if (shouldUseCurrentTab) {
1550
+ // Navigate in current tab if it's blank
1551
+
1552
+ const navigationStart = Date.now();
1553
+ const browserConfig = this.configLoader.getBrowserConfig();
1554
+ await currentPage.goto(url, {
1555
+ timeout: browserConfig.navigationTimeout,
1556
+ waitUntil: browserConfig.domContentLoadedState as any
1557
+ });
1558
+
1559
+ // Reset scroll position after navigation
1560
+ this.scrollPosition = { x: 0, y: 0 };
1561
+
1562
+ // Mark that we've navigated
1563
+ this.hasNavigatedBefore = true;
1564
+
1565
+ const navigationTime = Date.now() - navigationStart;
1566
+ const stabilityResult = await this.waitForPageStability(currentPage);
1567
+ const totalTime = Date.now() - startTime;
1568
+
1569
+ return {
1570
+ success: true,
1571
+ message: `Navigated to ${url}`,
1572
+ timing: {
1573
+ total_time_ms: totalTime,
1574
+ navigation_time_ms: navigationTime,
1575
+ dom_content_loaded_time_ms: stabilityResult.domContentLoadedTime,
1576
+ network_idle_time_ms: stabilityResult.networkIdleTime,
1577
+ },
1578
+ };
1579
+ } else {
1580
+ // Open in new tab if current page has content
1581
+ if (!this.context) {
1582
+ throw new Error('Browser context not initialized');
1583
+ }
1584
+
1585
+ const navigationStart = Date.now();
1586
+
1587
+ // In CDP mode, find an available blank tab instead of creating new page
1588
+ let newPage: Page | null = null;
1589
+ let newTabId: string | null = null;
1590
+
1591
+ const browserConfig = this.configLoader.getBrowserConfig();
1592
+ if (browserConfig.cdpUrl) {
1593
+ // CDP mode: find an available blank tab
1594
+ const allPages = this.context.pages();
1595
+ for (const page of allPages) {
1596
+ const pageUrl = page.url();
1597
+ // Check if this page is not already tracked and is blank
1598
+ const isTracked = Array.from(this.pages.values()).includes(page);
1599
+ if (!isTracked && this.isBlankPageUrl(pageUrl)) {
1600
+ newPage = page;
1601
+ newTabId = this.generateTabId();
1602
+ this.registerNewPage(newTabId, newPage);
1603
+ break;
1604
+ }
1605
+ }
1606
+
1607
+ if (!newPage || !newTabId) {
1608
+ console.log('[CDP] No available blank tabs, creating new page');
1609
+ newPage = await this.context.newPage();
1610
+ newTabId = this.generateTabId();
1611
+ this.registerNewPage(newTabId, newPage);
1612
+ }
1613
+ } else {
1614
+ // Non-CDP mode: create new page as usual
1615
+ newPage = await this.context.newPage();
1616
+ newTabId = this.generateTabId();
1617
+ this.registerNewPage(newTabId, newPage);
1618
+ }
1619
+
1620
+ // Set up page properties
1621
+ newPage.setDefaultNavigationTimeout(browserConfig.navigationTimeout);
1622
+ newPage.setDefaultTimeout(browserConfig.navigationTimeout);
1623
+
1624
+ // Navigate to the URL
1625
+ await newPage.goto(url, {
1626
+ timeout: browserConfig.navigationTimeout,
1627
+ waitUntil: browserConfig.domContentLoadedState as any
1628
+ });
1629
+
1630
+ // Automatically switch to the new tab
1631
+ this.currentTabId = newTabId;
1632
+ await newPage.bringToFront();
1633
+
1634
+ // Reset scroll position for the new page
1635
+ this.scrollPosition = { x: 0, y: 0 };
1636
+
1637
+ // Mark that we've navigated
1638
+ this.hasNavigatedBefore = true;
1639
+
1640
+ const navigationTime = Date.now() - navigationStart;
1641
+ const stabilityResult = await this.waitForPageStability(newPage);
1642
+ const totalTime = Date.now() - startTime;
1643
+
1644
+ return {
1645
+ success: true,
1646
+ message: `Opened ${url} in new tab`,
1647
+ newTabId: newTabId, // Include the new tab ID
1648
+ timing: {
1649
+ total_time_ms: totalTime,
1650
+ navigation_time_ms: navigationTime,
1651
+ dom_content_loaded_time_ms: stabilityResult.domContentLoadedTime,
1652
+ network_idle_time_ms: stabilityResult.networkIdleTime,
1653
+ },
1654
+ };
1655
+ }
1656
+ } catch (error) {
1657
+ const totalTime = Date.now() - startTime;
1658
+ return {
1659
+ success: false,
1660
+ message: `Navigation to ${url} failed: ${error}`,
1661
+ timing: {
1662
+ total_time_ms: totalTime,
1663
+ navigation_time_ms: 0,
1664
+ dom_content_loaded_time_ms: 0,
1665
+ network_idle_time_ms: 0,
1666
+ },
1667
+ };
1668
+ }
1669
+ }
1670
+
1671
+ async switchToTab(tabId: string): Promise<boolean> {
1672
+ if (!this.pages.has(tabId)) {
1673
+ return false;
1674
+ }
1675
+
1676
+ const page = this.pages.get(tabId)!;
1677
+
1678
+ if (page.isClosed()) {
1679
+ this.pages.delete(tabId);
1680
+ return false;
1681
+ }
1682
+
1683
+ try {
1684
+ console.log(`Switching to tab ${tabId}`);
1685
+
1686
+ // Update internal state first
1687
+ this.currentTabId = tabId;
1688
+
1689
+ // Try to activate the tab using a gentler approach
1690
+ // Instead of bringToFront, we'll use a combination of methods
1691
+ try {
1692
+ // Method 1: Evaluate focus in the page context
1693
+ await page.evaluate(() => {
1694
+ // Focus the window
1695
+ window.focus();
1696
+ // Dispatch a focus event
1697
+ window.dispatchEvent(new Event('focus'));
1698
+ }).catch(() => {});
1699
+
1700
+ // Method 2: For non-headless mode, schedule bringToFront asynchronously
1701
+ // This prevents WebSocket disruption by not blocking the current operation
1702
+ if (!this.configLoader.getBrowserConfig().headless) {
1703
+ // Use Promise to handle async operation without await
1704
+ Promise.resolve().then(async () => {
1705
+ // Small delay to ensure WebSocket message is processed
1706
+ const browserConfig = this.configLoader.getBrowserConfig();
1707
+ await new Promise(resolve => setTimeout(resolve, browserConfig.navigationDelay));
1708
+ try {
1709
+ await page.bringToFront();
1710
+ } catch (e) {
1711
+ // Silently ignore - tab switching still works internally
1712
+ console.debug(`bringToFront failed for ${tabId}, but tab is switched internally`);
1713
+ }
1714
+ });
1715
+ }
1716
+ } catch (error) {
1717
+ // Log but don't fail - internal state is still updated
1718
+ console.warn(`Tab focus warning for ${tabId}:`, error);
1719
+ }
1720
+
1721
+ console.log(`Successfully switched to tab ${tabId}`);
1722
+ return true;
1723
+ } catch (error) {
1724
+ console.error(`Error switching to tab ${tabId}:`, error);
1725
+ return false;
1726
+ }
1727
+ }
1728
+
1729
+ async closeTab(tabId: string): Promise<boolean> {
1730
+ if (!this.pages.has(tabId)) {
1731
+ return false;
1732
+ }
1733
+
1734
+ const page = this.pages.get(tabId)!;
1735
+
1736
+ if (!page.isClosed()) {
1737
+ await page.close();
1738
+ }
1739
+
1740
+ this.pages.delete(tabId);
1741
+
1742
+ if (tabId === this.currentTabId) {
1743
+ const remainingTabs = Array.from(this.pages.keys());
1744
+ if (remainingTabs.length > 0) {
1745
+ this.currentTabId = remainingTabs[0];
1746
+ } else {
1747
+ this.currentTabId = null;
1748
+ }
1749
+ }
1750
+
1751
+ return true;
1752
+ }
1753
+
1754
+ async batchKeyboardInput(operations: Array<{type: string, keys?: string[], text?: string, delay?: number}>, skipStabilityWait: boolean = false): Promise<any> {
1755
+ const startTime = Date.now();
1756
+ const page = await this.getCurrentPage();
1757
+
1758
+ try {
1759
+ const maxOperations = 100; // Prevent excessive number of operations per batch
1760
+ if (!Array.isArray(operations) || operations.length > maxOperations) {
1761
+ throw new Error(`Too many operations in batch (max ${maxOperations} allowed)`);
1762
+ }
1763
+
1764
+ const executionStart = Date.now();
1765
+
1766
+ for (const op of operations) {
1767
+ switch (op.type) {
1768
+ case 'press':
1769
+ if (op.keys) {
1770
+ const keys = op.keys.join('+');
1771
+ await page.keyboard.press(keys);
1772
+ }
1773
+ break;
1774
+ case 'type':
1775
+ if (op.text) {
1776
+ // Limit delay to prevent resource exhaustion attacks
1777
+ const maxTypeDelay = 1000; // 1 second per character max
1778
+ let delayValue = Number(op.delay);
1779
+ if (!isFinite(delayValue) || delayValue < 0) delayValue = 0;
1780
+ const safeTypeDelay = Math.min(delayValue, maxTypeDelay);
1781
+ await page.keyboard.type(op.text, { delay: safeTypeDelay });
1782
+ }
1783
+ break;
1784
+ case 'wait':
1785
+ // Only apply wait if op.delay is a non-negative finite number
1786
+ // Limit to prevent resource exhaustion (CodeQL js/resource-exhaustion)
1787
+ {
1788
+ const MAX_WAIT_DELAY = 10000; // 10 seconds maximum
1789
+ let delayValue = Number(op.delay);
1790
+ if (!isFinite(delayValue) || delayValue < 0) {
1791
+ delayValue = 0;
1792
+ }
1793
+ // Clamp delay to safe range [0, MAX_WAIT_DELAY]
1794
+ const safeDelay = delayValue > MAX_WAIT_DELAY ? MAX_WAIT_DELAY : delayValue;
1795
+ // lgtm[js/resource-exhaustion]
1796
+ // Safe: delay is clamped to MAX_WAIT_DELAY (10 seconds)
1797
+ await new Promise(resolve => setTimeout(resolve, safeDelay));
1798
+ }
1799
+ break;
1800
+ }
1801
+ }
1802
+
1803
+ const executionTime = Date.now() - executionStart;
1804
+ let stabilityTime = 0;
1805
+ let stabilityResult = { domContentLoadedTime: 0, networkIdleTime: 0 };
1806
+
1807
+ if (!skipStabilityWait) {
1808
+ const stabilityStart = Date.now();
1809
+
1810
+ try {
1811
+ const browserConfig = this.configLoader.getBrowserConfig();
1812
+ await page.waitForLoadState(browserConfig.domContentLoadedState as any, { timeout: browserConfig.pageStabilityTimeout });
1813
+ stabilityResult.domContentLoadedTime = Date.now() - stabilityStart;
1814
+ } catch (error) {
1815
+ }
1816
+
1817
+ await new Promise(resolve => setTimeout(resolve, 50));
1818
+ stabilityTime = Date.now() - stabilityStart;
1819
+ } else {
1820
+ await new Promise(resolve => setTimeout(resolve, 50));
1821
+ stabilityTime = 50;
1822
+ }
1823
+
1824
+ const totalTime = Date.now() - startTime;
1825
+
1826
+ return {
1827
+ success: true,
1828
+ message: `Batch keyboard input completed (${operations.length} operations)`,
1829
+ timing: {
1830
+ total_time_ms: totalTime,
1831
+ execution_time_ms: executionTime,
1832
+ stability_wait_time_ms: stabilityTime,
1833
+ operations_count: operations.length,
1834
+ skipped_stability: skipStabilityWait,
1835
+ },
1836
+ };
1837
+ } catch (error) {
1838
+ const totalTime = Date.now() - startTime;
1839
+ return {
1840
+ success: false,
1841
+ message: `Batch keyboard input failed: ${error}`,
1842
+ timing: {
1843
+ total_time_ms: totalTime,
1844
+ },
1845
+ };
1846
+ }
1847
+ }
1848
+
1849
+ async getTabInfo(): Promise<TabInfo[]> {
1850
+ const tabInfo: TabInfo[] = [];
1851
+
1852
+ for (const [tabId, page] of this.pages) {
1853
+ if (!page.isClosed()) {
1854
+ try {
1855
+ const title = await page.title();
1856
+ const url = page.url();
1857
+
1858
+ tabInfo.push({
1859
+ tab_id: tabId,
1860
+ title,
1861
+ url,
1862
+ is_current: tabId === this.currentTabId,
1863
+ });
1864
+ } catch (error) {
1865
+ // Skip tabs that can't be accessed
1866
+ }
1867
+ }
1868
+ }
1869
+
1870
+ return tabInfo;
1871
+ }
1872
+
1873
+ async takeScreenshot(): Promise<{ buffer: Buffer; timing: { screenshot_time_ms: number } }> {
1874
+ const startTime = Date.now();
1875
+ const page = await this.getCurrentPage();
1876
+
1877
+ const browserConfig = this.configLoader.getBrowserConfig();
1878
+ const buffer = await page.screenshot({
1879
+ timeout: browserConfig.screenshotTimeout,
1880
+ fullPage: browserConfig.fullPageScreenshot
1881
+ });
1882
+
1883
+ const screenshotTime = Date.now() - startTime;
1884
+
1885
+ return {
1886
+ buffer,
1887
+ timing: {
1888
+ screenshot_time_ms: screenshotTime,
1889
+ },
1890
+ };
1891
+ }
1892
+
1893
+ async close(): Promise<void> {
1894
+ const browserConfig = this.configLoader.getBrowserConfig();
1895
+
1896
+ for (const page of this.pages.values()) {
1897
+ if (!page.isClosed()) {
1898
+ await page.close();
1899
+ }
1900
+ }
1901
+
1902
+ this.pages.clear();
1903
+ this.currentTabId = null;
1904
+
1905
+ // Handle context cleanup separately for CDP mode
1906
+ if (!browserConfig.cdpUrl && this.context && this.contextOwnedByUs) {
1907
+ // For non-CDP mode, close context here
1908
+ await this.context.close();
1909
+ this.context = null;
1910
+ this.contextOwnedByUs = false;
1911
+ }
1912
+
1913
+ if (this.browser) {
1914
+ if (browserConfig.cdpUrl) {
1915
+ // In CDP mode: tear down only our context, then disconnect
1916
+ if (this.context && this.contextOwnedByUs) {
1917
+ await this.context.close().catch(() => {});
1918
+ this.context = null;
1919
+ this.contextOwnedByUs = false;
1920
+ }
1921
+ await this.browser.close(); // disconnect
1922
+ } else {
1923
+ // Local launch: close everything
1924
+ await this.browser.close();
1925
+ }
1926
+ this.browser = null;
1927
+ }
1928
+ }
1929
+ }