camel-ai 0.2.59__py3-none-any.whl → 0.2.82__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

Files changed (506) hide show
  1. camel/__init__.py +3 -3
  2. camel/agents/__init__.py +2 -2
  3. camel/agents/_types.py +9 -4
  4. camel/agents/_utils.py +40 -2
  5. camel/agents/base.py +2 -2
  6. camel/agents/chat_agent.py +5012 -902
  7. camel/agents/critic_agent.py +2 -2
  8. camel/agents/deductive_reasoner_agent.py +56 -56
  9. camel/agents/embodied_agent.py +2 -2
  10. camel/agents/knowledge_graph_agent.py +20 -20
  11. camel/agents/mcp_agent.py +39 -36
  12. camel/agents/multi_hop_generator_agent.py +3 -3
  13. camel/agents/programmed_agent_instruction.py +2 -2
  14. camel/agents/repo_agent.py +4 -3
  15. camel/agents/role_assignment_agent.py +2 -2
  16. camel/agents/search_agent.py +2 -2
  17. camel/agents/task_agent.py +2 -2
  18. camel/agents/tool_agents/__init__.py +2 -2
  19. camel/agents/tool_agents/base.py +2 -2
  20. camel/agents/tool_agents/hugging_face_tool_agent.py +3 -3
  21. camel/benchmarks/__init__.py +2 -2
  22. camel/benchmarks/apibank.py +5 -5
  23. camel/benchmarks/apibench.py +2 -2
  24. camel/benchmarks/base.py +2 -2
  25. camel/benchmarks/browsecomp.py +44 -33
  26. camel/benchmarks/gaia.py +17 -13
  27. camel/benchmarks/mock_website/README.md +94 -0
  28. camel/benchmarks/mock_website/mock_web.py +299 -0
  29. camel/benchmarks/mock_website/requirements.txt +3 -0
  30. camel/benchmarks/mock_website/shopping_mall/app.py +465 -0
  31. camel/benchmarks/mock_website/task.json +104 -0
  32. camel/benchmarks/nexus.py +3 -3
  33. camel/benchmarks/ragbench.py +2 -2
  34. camel/bots/__init__.py +2 -2
  35. camel/bots/discord/__init__.py +2 -2
  36. camel/bots/discord/discord_app.py +2 -2
  37. camel/bots/discord/discord_installation.py +2 -2
  38. camel/bots/discord/discord_store.py +3 -3
  39. camel/bots/slack/__init__.py +2 -2
  40. camel/bots/slack/models.py +4 -4
  41. camel/bots/slack/slack_app.py +2 -2
  42. camel/bots/telegram_bot.py +2 -2
  43. camel/configs/__init__.py +26 -2
  44. camel/configs/aihubmix_config.py +90 -0
  45. camel/configs/aiml_config.py +2 -2
  46. camel/configs/amd_config.py +70 -0
  47. camel/configs/anthropic_config.py +8 -7
  48. camel/configs/base_config.py +2 -2
  49. camel/configs/bedrock_config.py +5 -3
  50. camel/configs/cerebras_config.py +98 -0
  51. camel/configs/cohere_config.py +3 -3
  52. camel/configs/cometapi_config.py +106 -0
  53. camel/configs/crynux_config.py +94 -0
  54. camel/configs/deepseek_config.py +9 -8
  55. camel/configs/gemini_config.py +6 -4
  56. camel/configs/groq_config.py +6 -4
  57. camel/configs/internlm_config.py +6 -4
  58. camel/configs/litellm_config.py +2 -2
  59. camel/configs/lmstudio_config.py +6 -4
  60. camel/configs/minimax_config.py +95 -0
  61. camel/configs/mistral_config.py +3 -3
  62. camel/configs/modelscope_config.py +5 -3
  63. camel/configs/moonshot_config.py +2 -2
  64. camel/configs/nebius_config.py +105 -0
  65. camel/configs/netmind_config.py +2 -2
  66. camel/configs/novita_config.py +2 -2
  67. camel/configs/nvidia_config.py +2 -2
  68. camel/configs/ollama_config.py +2 -2
  69. camel/configs/openai_config.py +8 -3
  70. camel/configs/openrouter_config.py +6 -4
  71. camel/configs/ppio_config.py +2 -2
  72. camel/configs/qianfan_config.py +85 -0
  73. camel/configs/qwen_config.py +2 -2
  74. camel/configs/reka_config.py +3 -3
  75. camel/configs/samba_config.py +8 -6
  76. camel/configs/sglang_config.py +2 -2
  77. camel/configs/siliconflow_config.py +2 -2
  78. camel/configs/togetherai_config.py +2 -2
  79. camel/configs/vllm_config.py +4 -2
  80. camel/configs/watsonx_config.py +2 -2
  81. camel/configs/yi_config.py +6 -4
  82. camel/configs/zhipuai_config.py +6 -4
  83. camel/{data_collector → data_collectors}/__init__.py +2 -2
  84. camel/{data_collector → data_collectors}/alpaca_collector.py +19 -10
  85. camel/{data_collector → data_collectors}/base.py +2 -2
  86. camel/{data_collector → data_collectors}/sharegpt_collector.py +3 -3
  87. camel/datagen/__init__.py +2 -2
  88. camel/datagen/cot_datagen.py +32 -37
  89. camel/datagen/evol_instruct/__init__.py +2 -2
  90. camel/datagen/evol_instruct/evol_instruct.py +2 -2
  91. camel/datagen/evol_instruct/scorer.py +24 -25
  92. camel/datagen/evol_instruct/templates.py +48 -48
  93. camel/datagen/self_improving_cot.py +5 -5
  94. camel/datagen/self_instruct/__init__.py +2 -2
  95. camel/datagen/self_instruct/filter/__init__.py +2 -2
  96. camel/datagen/self_instruct/filter/filter_function.py +2 -2
  97. camel/datagen/self_instruct/filter/filter_registry.py +2 -2
  98. camel/datagen/self_instruct/filter/instruction_filter.py +2 -2
  99. camel/datagen/self_instruct/self_instruct.py +2 -2
  100. camel/datagen/self_instruct/templates.py +47 -47
  101. camel/datagen/source2synth/__init__.py +2 -2
  102. camel/datagen/source2synth/data_processor.py +2 -2
  103. camel/datagen/source2synth/models.py +2 -2
  104. camel/datagen/source2synth/user_data_processor_config.py +2 -2
  105. camel/datahubs/__init__.py +2 -2
  106. camel/datahubs/base.py +2 -2
  107. camel/datahubs/huggingface.py +2 -2
  108. camel/datahubs/models.py +2 -2
  109. camel/datasets/__init__.py +2 -2
  110. camel/datasets/base_generator.py +41 -12
  111. camel/datasets/few_shot_generator.py +18 -18
  112. camel/datasets/models.py +3 -3
  113. camel/datasets/self_instruct_generator.py +2 -2
  114. camel/datasets/static_dataset.py +152 -2
  115. camel/embeddings/__init__.py +2 -2
  116. camel/embeddings/azure_embedding.py +2 -2
  117. camel/embeddings/base.py +2 -2
  118. camel/embeddings/gemini_embedding.py +2 -2
  119. camel/embeddings/jina_embedding.py +10 -3
  120. camel/embeddings/mistral_embedding.py +2 -2
  121. camel/embeddings/openai_compatible_embedding.py +2 -2
  122. camel/embeddings/openai_embedding.py +2 -2
  123. camel/embeddings/sentence_transformers_embeddings.py +4 -4
  124. camel/embeddings/together_embedding.py +2 -2
  125. camel/embeddings/vlm_embedding.py +11 -4
  126. camel/environments/__init__.py +14 -2
  127. camel/environments/models.py +2 -2
  128. camel/environments/multi_step.py +2 -2
  129. camel/environments/rlcards_env.py +860 -0
  130. camel/environments/single_step.py +30 -5
  131. camel/environments/tic_tac_toe.py +3 -3
  132. camel/extractors/__init__.py +2 -2
  133. camel/extractors/base.py +2 -2
  134. camel/extractors/python_strategies.py +2 -2
  135. camel/generators.py +2 -2
  136. camel/human.py +2 -2
  137. camel/interpreters/__init__.py +4 -2
  138. camel/interpreters/base.py +16 -3
  139. camel/interpreters/docker/Dockerfile +53 -7
  140. camel/interpreters/docker_interpreter.py +70 -11
  141. camel/interpreters/e2b_interpreter.py +59 -11
  142. camel/interpreters/internal_python_interpreter.py +81 -4
  143. camel/interpreters/interpreter_error.py +2 -2
  144. camel/interpreters/ipython_interpreter.py +23 -5
  145. camel/interpreters/microsandbox_interpreter.py +395 -0
  146. camel/interpreters/subprocess_interpreter.py +36 -4
  147. camel/loaders/__init__.py +17 -5
  148. camel/loaders/apify_reader.py +2 -2
  149. camel/loaders/base_io.py +2 -2
  150. camel/loaders/base_loader.py +85 -0
  151. camel/loaders/chunkr_reader.py +128 -93
  152. camel/loaders/crawl4ai_reader.py +2 -2
  153. camel/loaders/firecrawl_reader.py +6 -6
  154. camel/loaders/jina_url_reader.py +2 -2
  155. camel/loaders/markitdown.py +2 -2
  156. camel/loaders/mineru_extractor.py +2 -2
  157. camel/loaders/mistral_reader.py +148 -0
  158. camel/loaders/scrapegraph_reader.py +2 -2
  159. camel/loaders/unstructured_io.py +2 -2
  160. camel/logger.py +5 -5
  161. camel/memories/__init__.py +2 -2
  162. camel/memories/agent_memories.py +86 -3
  163. camel/memories/base.py +36 -2
  164. camel/memories/blocks/__init__.py +2 -2
  165. camel/memories/blocks/chat_history_block.py +126 -9
  166. camel/memories/blocks/vectordb_block.py +10 -3
  167. camel/memories/context_creators/__init__.py +2 -2
  168. camel/memories/context_creators/score_based.py +31 -239
  169. camel/memories/records.py +98 -13
  170. camel/messages/__init__.py +2 -2
  171. camel/messages/base.py +193 -46
  172. camel/messages/conversion/__init__.py +2 -2
  173. camel/messages/conversion/alpaca.py +2 -2
  174. camel/messages/conversion/conversation_models.py +2 -2
  175. camel/messages/conversion/sharegpt/__init__.py +2 -2
  176. camel/messages/conversion/sharegpt/function_call_formatter.py +2 -2
  177. camel/messages/conversion/sharegpt/hermes/__init__.py +2 -2
  178. camel/messages/conversion/sharegpt/hermes/hermes_function_formatter.py +2 -2
  179. camel/messages/func_message.py +54 -17
  180. camel/models/__init__.py +18 -2
  181. camel/models/_utils.py +3 -3
  182. camel/models/aihubmix_model.py +83 -0
  183. camel/models/aiml_model.py +11 -18
  184. camel/models/amd_model.py +101 -0
  185. camel/models/anthropic_model.py +127 -20
  186. camel/models/aws_bedrock_model.py +12 -35
  187. camel/models/azure_openai_model.py +263 -63
  188. camel/models/base_audio_model.py +5 -3
  189. camel/models/base_model.py +195 -26
  190. camel/models/cerebras_model.py +83 -0
  191. camel/models/cohere_model.py +81 -21
  192. camel/models/cometapi_model.py +83 -0
  193. camel/models/crynux_model.py +87 -0
  194. camel/models/deepseek_model.py +61 -59
  195. camel/models/fish_audio_model.py +8 -2
  196. camel/models/gemini_model.py +439 -30
  197. camel/models/groq_model.py +11 -19
  198. camel/models/internlm_model.py +11 -18
  199. camel/models/litellm_model.py +94 -34
  200. camel/models/lmstudio_model.py +17 -20
  201. camel/models/minimax_model.py +83 -0
  202. camel/models/mistral_model.py +84 -19
  203. camel/models/model_factory.py +49 -6
  204. camel/models/model_manager.py +33 -11
  205. camel/models/modelscope_model.py +13 -193
  206. camel/models/moonshot_model.py +195 -21
  207. camel/models/nebius_model.py +83 -0
  208. camel/models/nemotron_model.py +19 -9
  209. camel/models/netmind_model.py +11 -18
  210. camel/models/novita_model.py +11 -18
  211. camel/models/nvidia_model.py +11 -18
  212. camel/models/ollama_model.py +14 -21
  213. camel/models/openai_audio_models.py +2 -2
  214. camel/models/openai_compatible_model.py +234 -27
  215. camel/models/openai_model.py +255 -39
  216. camel/models/openrouter_model.py +11 -19
  217. camel/models/ppio_model.py +11 -18
  218. camel/models/qianfan_model.py +89 -0
  219. camel/models/qwen_model.py +13 -193
  220. camel/models/reka_model.py +90 -21
  221. camel/models/reward/__init__.py +2 -2
  222. camel/models/reward/base_reward_model.py +2 -2
  223. camel/models/reward/evaluator.py +2 -2
  224. camel/models/reward/nemotron_model.py +2 -2
  225. camel/models/reward/skywork_model.py +2 -2
  226. camel/models/samba_model.py +117 -49
  227. camel/models/sglang_model.py +162 -42
  228. camel/models/siliconflow_model.py +12 -35
  229. camel/models/stub_model.py +10 -7
  230. camel/models/togetherai_model.py +11 -18
  231. camel/models/vllm_model.py +10 -18
  232. camel/models/volcano_model.py +16 -20
  233. camel/models/watsonx_model.py +69 -19
  234. camel/models/yi_model.py +11 -18
  235. camel/models/zhipuai_model.py +70 -18
  236. camel/parsers/__init__.py +18 -0
  237. camel/parsers/mcp_tool_call_parser.py +176 -0
  238. camel/personas/__init__.py +2 -2
  239. camel/personas/persona.py +2 -2
  240. camel/personas/persona_hub.py +2 -2
  241. camel/prompts/__init__.py +2 -2
  242. camel/prompts/ai_society.py +2 -2
  243. camel/prompts/base.py +2 -2
  244. camel/prompts/code.py +2 -2
  245. camel/prompts/evaluation.py +2 -2
  246. camel/prompts/generate_text_embedding_data.py +2 -2
  247. camel/prompts/image_craft.py +2 -2
  248. camel/prompts/misalignment.py +2 -2
  249. camel/prompts/multi_condition_image_craft.py +2 -2
  250. camel/prompts/object_recognition.py +2 -2
  251. camel/prompts/persona_hub.py +3 -3
  252. camel/prompts/prompt_templates.py +2 -2
  253. camel/prompts/role_description_prompt_template.py +2 -2
  254. camel/prompts/solution_extraction.py +8 -8
  255. camel/prompts/task_prompt_template.py +2 -2
  256. camel/prompts/translation.py +2 -2
  257. camel/prompts/video_description_prompt.py +3 -3
  258. camel/responses/__init__.py +2 -2
  259. camel/responses/agent_responses.py +2 -2
  260. camel/retrievers/__init__.py +2 -2
  261. camel/retrievers/auto_retriever.py +23 -3
  262. camel/retrievers/base.py +2 -2
  263. camel/retrievers/bm25_retriever.py +3 -4
  264. camel/retrievers/cohere_rerank_retriever.py +2 -2
  265. camel/retrievers/hybrid_retrival.py +4 -4
  266. camel/retrievers/vector_retriever.py +2 -2
  267. camel/runtimes/Dockerfile.multi-toolkit +90 -0
  268. camel/{runtime → runtimes}/__init__.py +2 -2
  269. camel/runtimes/api.py +153 -0
  270. camel/{runtime → runtimes}/base.py +2 -2
  271. camel/{runtime → runtimes}/configs.py +13 -13
  272. camel/{runtime → runtimes}/daytona_runtime.py +18 -19
  273. camel/{runtime → runtimes}/docker_runtime.py +13 -13
  274. camel/{runtime → runtimes}/llm_guard_runtime.py +28 -28
  275. camel/{runtime → runtimes}/remote_http_runtime.py +12 -12
  276. camel/{runtime → runtimes}/ubuntu_docker_runtime.py +3 -3
  277. camel/{runtime → runtimes}/utils/__init__.py +2 -2
  278. camel/{runtime → runtimes}/utils/function_risk_toolkit.py +2 -2
  279. camel/{runtime → runtimes}/utils/ignore_risk_toolkit.py +2 -2
  280. camel/schemas/__init__.py +2 -2
  281. camel/schemas/base.py +2 -2
  282. camel/schemas/openai_converter.py +3 -3
  283. camel/schemas/outlines_converter.py +2 -2
  284. camel/services/agent_openapi_server.py +380 -0
  285. camel/societies/__init__.py +4 -2
  286. camel/societies/babyagi_playing.py +2 -2
  287. camel/societies/role_playing.py +201 -80
  288. camel/societies/workforce/__init__.py +10 -3
  289. camel/societies/workforce/base.py +9 -5
  290. camel/societies/workforce/events.py +143 -0
  291. camel/societies/workforce/prompts.py +258 -33
  292. camel/societies/workforce/role_playing_worker.py +95 -30
  293. camel/societies/workforce/single_agent_worker.py +659 -30
  294. camel/societies/workforce/structured_output_handler.py +512 -0
  295. camel/societies/workforce/task_channel.py +182 -38
  296. camel/societies/workforce/utils.py +784 -18
  297. camel/societies/workforce/worker.py +96 -28
  298. camel/societies/workforce/workflow_memory_manager.py +1746 -0
  299. camel/societies/workforce/workforce.py +5730 -366
  300. camel/societies/workforce/workforce_callback.py +103 -0
  301. camel/societies/workforce/workforce_logger.py +647 -0
  302. camel/societies/workforce/workforce_metrics.py +33 -0
  303. camel/storages/__init__.py +10 -2
  304. camel/storages/graph_storages/__init__.py +2 -2
  305. camel/storages/graph_storages/base.py +2 -2
  306. camel/storages/graph_storages/graph_element.py +2 -2
  307. camel/storages/graph_storages/nebula_graph.py +4 -4
  308. camel/storages/graph_storages/neo4j_graph.py +7 -7
  309. camel/storages/key_value_storages/__init__.py +2 -2
  310. camel/storages/key_value_storages/base.py +2 -2
  311. camel/storages/key_value_storages/in_memory.py +2 -2
  312. camel/storages/key_value_storages/json.py +17 -4
  313. camel/storages/key_value_storages/mem0_cloud.py +50 -49
  314. camel/storages/key_value_storages/redis.py +2 -2
  315. camel/storages/object_storages/__init__.py +2 -2
  316. camel/storages/object_storages/amazon_s3.py +2 -2
  317. camel/storages/object_storages/azure_blob.py +2 -2
  318. camel/storages/object_storages/base.py +2 -2
  319. camel/storages/object_storages/google_cloud.py +3 -3
  320. camel/storages/vectordb_storages/__init__.py +12 -2
  321. camel/storages/vectordb_storages/base.py +2 -2
  322. camel/storages/vectordb_storages/chroma.py +731 -0
  323. camel/storages/vectordb_storages/faiss.py +712 -0
  324. camel/storages/vectordb_storages/milvus.py +2 -2
  325. camel/storages/vectordb_storages/oceanbase.py +16 -17
  326. camel/storages/vectordb_storages/pgvector.py +349 -0
  327. camel/storages/vectordb_storages/qdrant.py +6 -6
  328. camel/storages/vectordb_storages/surreal.py +372 -0
  329. camel/storages/vectordb_storages/tidb.py +11 -8
  330. camel/storages/vectordb_storages/weaviate.py +714 -0
  331. camel/tasks/__init__.py +2 -2
  332. camel/tasks/task.py +366 -27
  333. camel/tasks/task_prompt.py +3 -3
  334. camel/terminators/__init__.py +2 -2
  335. camel/terminators/base.py +2 -2
  336. camel/terminators/response_terminator.py +2 -2
  337. camel/terminators/token_limit_terminator.py +2 -2
  338. camel/toolkits/__init__.py +58 -10
  339. camel/toolkits/aci_toolkit.py +66 -21
  340. camel/toolkits/arxiv_toolkit.py +8 -8
  341. camel/toolkits/ask_news_toolkit.py +2 -2
  342. camel/toolkits/async_browser_toolkit.py +174 -575
  343. camel/toolkits/audio_analysis_toolkit.py +3 -3
  344. camel/toolkits/base.py +65 -7
  345. camel/toolkits/bohrium_toolkit.py +318 -0
  346. camel/toolkits/browser_toolkit.py +306 -566
  347. camel/toolkits/browser_toolkit_commons.py +568 -0
  348. camel/toolkits/code_execution.py +67 -11
  349. camel/toolkits/context_summarizer_toolkit.py +684 -0
  350. camel/toolkits/craw4ai_toolkit.py +93 -0
  351. camel/toolkits/dappier_toolkit.py +12 -8
  352. camel/toolkits/data_commons_toolkit.py +2 -2
  353. camel/toolkits/dingtalk.py +1135 -0
  354. camel/toolkits/earth_science_toolkit.py +5367 -0
  355. camel/toolkits/edgeone_pages_mcp_toolkit.py +49 -0
  356. camel/toolkits/excel_toolkit.py +910 -70
  357. camel/toolkits/file_toolkit.py +1402 -0
  358. camel/toolkits/function_tool.py +128 -20
  359. camel/toolkits/github_toolkit.py +148 -43
  360. camel/toolkits/gmail_toolkit.py +1839 -0
  361. camel/toolkits/google_calendar_toolkit.py +40 -6
  362. camel/toolkits/google_drive_mcp_toolkit.py +54 -0
  363. camel/toolkits/google_maps_toolkit.py +2 -2
  364. camel/toolkits/google_scholar_toolkit.py +2 -2
  365. camel/toolkits/human_toolkit.py +36 -12
  366. camel/toolkits/hybrid_browser_toolkit/__init__.py +18 -0
  367. camel/toolkits/hybrid_browser_toolkit/config_loader.py +185 -0
  368. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +246 -0
  369. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +1973 -0
  370. camel/toolkits/hybrid_browser_toolkit/installer.py +203 -0
  371. camel/toolkits/hybrid_browser_toolkit/ts/package-lock.json +4589 -0
  372. camel/toolkits/hybrid_browser_toolkit/ts/package.json +33 -0
  373. camel/toolkits/hybrid_browser_toolkit/ts/src/browser-scripts.js +125 -0
  374. camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +1929 -0
  375. camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +233 -0
  376. camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +589 -0
  377. camel/toolkits/hybrid_browser_toolkit/ts/src/index.ts +7 -0
  378. camel/toolkits/hybrid_browser_toolkit/ts/src/parent-child-filter.ts +226 -0
  379. camel/toolkits/hybrid_browser_toolkit/ts/src/snapshot-parser.ts +219 -0
  380. camel/toolkits/hybrid_browser_toolkit/ts/src/som-screenshot-injected.ts +543 -0
  381. camel/toolkits/hybrid_browser_toolkit/ts/src/types.ts +129 -0
  382. camel/toolkits/hybrid_browser_toolkit/ts/tsconfig.json +27 -0
  383. camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +319 -0
  384. camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +1037 -0
  385. camel/toolkits/hybrid_browser_toolkit_py/__init__.py +17 -0
  386. camel/toolkits/hybrid_browser_toolkit_py/actions.py +575 -0
  387. camel/toolkits/hybrid_browser_toolkit_py/agent.py +311 -0
  388. camel/toolkits/hybrid_browser_toolkit_py/browser_session.py +787 -0
  389. camel/toolkits/hybrid_browser_toolkit_py/config_loader.py +490 -0
  390. camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +2390 -0
  391. camel/toolkits/hybrid_browser_toolkit_py/snapshot.py +233 -0
  392. camel/toolkits/hybrid_browser_toolkit_py/stealth_script.js +0 -0
  393. camel/toolkits/hybrid_browser_toolkit_py/unified_analyzer.js +1043 -0
  394. camel/toolkits/image_analysis_toolkit.py +3 -3
  395. camel/toolkits/image_generation_toolkit.py +390 -0
  396. camel/toolkits/jina_reranker_toolkit.py +195 -79
  397. camel/toolkits/klavis_toolkit.py +7 -3
  398. camel/toolkits/linkedin_toolkit.py +2 -2
  399. camel/toolkits/markitdown_toolkit.py +104 -0
  400. camel/toolkits/math_toolkit.py +66 -12
  401. camel/toolkits/mcp_toolkit.py +841 -600
  402. camel/toolkits/memory_toolkit.py +7 -3
  403. camel/toolkits/meshy_toolkit.py +2 -2
  404. camel/toolkits/message_agent_toolkit.py +608 -0
  405. camel/toolkits/message_integration.py +724 -0
  406. camel/toolkits/mineru_toolkit.py +2 -2
  407. camel/toolkits/minimax_mcp_toolkit.py +195 -0
  408. camel/toolkits/networkx_toolkit.py +2 -2
  409. camel/toolkits/note_taking_toolkit.py +277 -0
  410. camel/toolkits/notion_mcp_toolkit.py +224 -0
  411. camel/toolkits/notion_toolkit.py +2 -2
  412. camel/toolkits/open_api_specs/biztoc/__init__.py +2 -2
  413. camel/toolkits/open_api_specs/biztoc/ai-plugin.json +1 -1
  414. camel/toolkits/open_api_specs/coursera/__init__.py +2 -2
  415. camel/toolkits/open_api_specs/create_qr_code/__init__.py +2 -2
  416. camel/toolkits/open_api_specs/klarna/__init__.py +2 -2
  417. camel/toolkits/open_api_specs/nasa_apod/__init__.py +2 -2
  418. camel/toolkits/open_api_specs/outschool/__init__.py +2 -2
  419. camel/toolkits/open_api_specs/outschool/ai-plugin.json +1 -1
  420. camel/toolkits/open_api_specs/outschool/openapi.yaml +1 -1
  421. camel/toolkits/open_api_specs/outschool/paths/__init__.py +2 -2
  422. camel/toolkits/open_api_specs/outschool/paths/get_classes.py +2 -2
  423. camel/toolkits/open_api_specs/outschool/paths/search_teachers.py +2 -2
  424. camel/toolkits/open_api_specs/security_config.py +2 -2
  425. camel/toolkits/open_api_specs/speak/__init__.py +2 -2
  426. camel/toolkits/open_api_specs/web_scraper/__init__.py +2 -2
  427. camel/toolkits/open_api_specs/web_scraper/ai-plugin.json +1 -1
  428. camel/toolkits/open_api_specs/web_scraper/paths/__init__.py +2 -2
  429. camel/toolkits/open_api_specs/web_scraper/paths/scraper.py +2 -2
  430. camel/toolkits/open_api_toolkit.py +2 -2
  431. camel/toolkits/openbb_toolkit.py +7 -3
  432. camel/toolkits/origene_mcp_toolkit.py +56 -0
  433. camel/toolkits/page_script.js +86 -74
  434. camel/toolkits/playwright_mcp_toolkit.py +27 -32
  435. camel/toolkits/pptx_toolkit.py +790 -0
  436. camel/toolkits/pubmed_toolkit.py +2 -2
  437. camel/toolkits/pulse_mcp_search_toolkit.py +2 -2
  438. camel/toolkits/pyautogui_toolkit.py +2 -2
  439. camel/toolkits/reddit_toolkit.py +2 -2
  440. camel/toolkits/resend_toolkit.py +168 -0
  441. camel/toolkits/retrieval_toolkit.py +2 -2
  442. camel/toolkits/screenshot_toolkit.py +213 -0
  443. camel/toolkits/search_toolkit.py +539 -146
  444. camel/toolkits/searxng_toolkit.py +2 -2
  445. camel/toolkits/semantic_scholar_toolkit.py +2 -2
  446. camel/toolkits/slack_toolkit.py +108 -58
  447. camel/toolkits/sql_toolkit.py +712 -0
  448. camel/toolkits/stripe_toolkit.py +2 -2
  449. camel/toolkits/sympy_toolkit.py +3 -3
  450. camel/toolkits/task_planning_toolkit.py +134 -0
  451. camel/toolkits/terminal_toolkit/__init__.py +18 -0
  452. camel/toolkits/terminal_toolkit/terminal_toolkit.py +1070 -0
  453. camel/toolkits/terminal_toolkit/utils.py +532 -0
  454. camel/toolkits/thinking_toolkit.py +3 -3
  455. camel/toolkits/twitter_toolkit.py +8 -3
  456. camel/toolkits/vertex_ai_veo_toolkit.py +590 -0
  457. camel/toolkits/video_analysis_toolkit.py +112 -29
  458. camel/toolkits/video_download_toolkit.py +22 -16
  459. camel/toolkits/weather_toolkit.py +2 -2
  460. camel/toolkits/web_deploy_toolkit.py +1219 -0
  461. camel/toolkits/wechat_official_toolkit.py +483 -0
  462. camel/toolkits/whatsapp_toolkit.py +2 -2
  463. camel/toolkits/wolfram_alpha_toolkit.py +53 -25
  464. camel/toolkits/zapier_toolkit.py +7 -3
  465. camel/types/__init__.py +4 -4
  466. camel/types/agents/__init__.py +2 -2
  467. camel/types/agents/tool_calling_record.py +6 -3
  468. camel/types/enums.py +454 -35
  469. camel/types/mcp_registries.py +2 -2
  470. camel/types/openai_types.py +4 -4
  471. camel/types/unified_model_type.py +43 -6
  472. camel/utils/__init__.py +20 -2
  473. camel/utils/async_func.py +2 -2
  474. camel/utils/chunker/__init__.py +2 -2
  475. camel/utils/chunker/base.py +2 -2
  476. camel/utils/chunker/code_chunker.py +2 -2
  477. camel/utils/chunker/uio_chunker.py +2 -2
  478. camel/utils/commons.py +65 -7
  479. camel/utils/constants.py +5 -2
  480. camel/utils/context_utils.py +1134 -0
  481. camel/utils/deduplication.py +2 -2
  482. camel/utils/filename.py +2 -2
  483. camel/utils/langfuse.py +258 -0
  484. camel/utils/mcp.py +140 -6
  485. camel/utils/mcp_client.py +1056 -0
  486. camel/utils/message_summarizer.py +148 -0
  487. camel/utils/response_format.py +2 -2
  488. camel/utils/token_counting.py +45 -22
  489. camel/utils/tool_result.py +44 -0
  490. camel/verifiers/__init__.py +2 -2
  491. camel/verifiers/base.py +2 -2
  492. camel/verifiers/math_verifier.py +2 -2
  493. camel/verifiers/models.py +2 -2
  494. camel/verifiers/physics_verifier.py +2 -2
  495. camel/verifiers/python_verifier.py +2 -2
  496. {camel_ai-0.2.59.dist-info → camel_ai-0.2.82.dist-info}/METADATA +349 -108
  497. camel_ai-0.2.82.dist-info/RECORD +507 -0
  498. {camel_ai-0.2.59.dist-info → camel_ai-0.2.82.dist-info}/WHEEL +1 -1
  499. {camel_ai-0.2.59.dist-info → camel_ai-0.2.82.dist-info}/licenses/LICENSE +1 -1
  500. camel/loaders/pandas_reader.py +0 -368
  501. camel/runtime/api.py +0 -97
  502. camel/toolkits/dalle_toolkit.py +0 -171
  503. camel/toolkits/file_write_toolkit.py +0 -395
  504. camel/toolkits/openai_agent_toolkit.py +0 -135
  505. camel/toolkits/terminal_toolkit.py +0 -1037
  506. camel_ai-0.2.59.dist-info/RECORD +0 -410
@@ -0,0 +1,2390 @@
1
+ # ========= Copyright 2023-2025 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2025 @ CAMEL-AI.org. All Rights Reserved. =========
14
+
15
+ import datetime
16
+ import io
17
+ import json
18
+ import os
19
+ import time
20
+ import urllib.parse
21
+ from functools import wraps
22
+ from typing import Any, Callable, ClassVar, Dict, List, Optional, cast
23
+
24
+ from camel.logger import get_logger
25
+ from camel.models import BaseModelBackend
26
+ from camel.toolkits.base import BaseToolkit, RegisteredAgentToolkit
27
+ from camel.toolkits.function_tool import FunctionTool
28
+ from camel.utils import sanitize_filename
29
+ from camel.utils.commons import dependencies_required
30
+
31
+ from .agent import PlaywrightLLMAgent
32
+ from .browser_session import HybridBrowserSession
33
+ from .config_loader import ConfigLoader
34
+
35
+ logger = get_logger(__name__)
36
+
37
+
38
+ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
39
+ r"""A hybrid browser toolkit that combines non-visual, DOM-based browser
40
+ automation with visual, screenshot-based capabilities.
41
+
42
+ This toolkit exposes a set of actions as CAMEL FunctionTools for agents
43
+ to interact with web pages. It can operate in headless mode and supports
44
+ both programmatic control of browser actions (like clicking and typing)
45
+ and visual analysis of the page layout through screenshots with marked
46
+ interactive elements.
47
+ """
48
+
49
+ # Default tool list - core browser functionality
50
+ DEFAULT_TOOLS: ClassVar[List[str]] = [
51
+ "browser_open",
52
+ "browser_close",
53
+ "browser_visit_page",
54
+ "browser_back",
55
+ "browser_forward",
56
+ "browser_click",
57
+ "browser_type",
58
+ "browser_switch_tab",
59
+ ]
60
+
61
+ # All available tools
62
+ ALL_TOOLS: ClassVar[List[str]] = [
63
+ "browser_open",
64
+ "browser_close",
65
+ "browser_visit_page",
66
+ "browser_back",
67
+ "browser_forward",
68
+ "browser_get_page_snapshot",
69
+ "browser_get_som_screenshot",
70
+ "browser_get_page_links",
71
+ "browser_click",
72
+ "browser_type",
73
+ "browser_select",
74
+ "browser_scroll",
75
+ "browser_enter",
76
+ "browser_mouse_control",
77
+ "browser_mouse_drag",
78
+ "browser_press_key",
79
+ "browser_wait_user",
80
+ "browser_solve_task",
81
+ "browser_switch_tab",
82
+ "browser_close_tab",
83
+ "browser_get_tab_info",
84
+ "browser_console_view",
85
+ "browser_console_exec",
86
+ ]
87
+
88
+ def __init__(
89
+ self,
90
+ *,
91
+ headless: bool = True,
92
+ user_data_dir: Optional[str] = None,
93
+ stealth: bool = False,
94
+ web_agent_model: Optional[BaseModelBackend] = None,
95
+ cache_dir: Optional[str] = None,
96
+ enabled_tools: Optional[List[str]] = None,
97
+ browser_log_to_file: bool = False,
98
+ log_dir: Optional[str] = None,
99
+ session_id: Optional[str] = None,
100
+ default_start_url: Optional[str] = None,
101
+ default_timeout: Optional[int] = None,
102
+ short_timeout: Optional[int] = None,
103
+ navigation_timeout: Optional[int] = None,
104
+ network_idle_timeout: Optional[int] = None,
105
+ screenshot_timeout: Optional[int] = None,
106
+ page_stability_timeout: Optional[int] = None,
107
+ dom_content_loaded_timeout: Optional[int] = None,
108
+ viewport_limit: bool = False,
109
+ ) -> None:
110
+ r"""Initialize the HybridBrowserToolkit.
111
+
112
+ Args:
113
+ headless (bool): Whether to run the browser in headless mode.
114
+ Defaults to `True`.
115
+ user_data_dir (Optional[str]): Path to a directory for storing
116
+ browser data like cookies and local storage. Useful for
117
+ maintaining sessions across runs. Defaults to `None` (a
118
+ temporary directory is used).
119
+ stealth (bool): Whether to run the browser in stealth mode to
120
+ avoid
121
+ bot detection. When enabled, hides WebDriver characteristics,
122
+ spoofs navigator properties, and implements various
123
+ anti-detection
124
+ measures. Highly recommended for production use and when
125
+ accessing sites with bot detection. Defaults to `False`.
126
+ web_agent_model (Optional[BaseModelBackend]): The language model
127
+ backend to use for the high-level `solve_task` agent. This is
128
+ required only if you plan to use `solve_task`.
129
+ Defaults to `None`.
130
+ cache_dir (str): The directory to store cached files, such as
131
+ screenshots. Defaults to `"tmp/"`.
132
+ enabled_tools (Optional[List[str]]): List of tool names to
133
+ enable.
134
+ If None, uses DEFAULT_TOOLS. Available tools: browser_open,
135
+ browser_close, browser_visit_page, browser_back,
136
+ browser_forward, browser_get_page_snapshot,
137
+ browser_get_som_screenshot, browser_get_page_links,
138
+ browser_click, browser_type, browser_select,
139
+ browser_scroll, browser_enter, browser_wait_user,
140
+ browser_solve_task.
141
+ Defaults to `None`.
142
+ browser_log_to_file (bool): Whether to save detailed browser
143
+ action logs to file.
144
+ When enabled, logs action inputs/outputs, execution times,
145
+ and page loading times.
146
+ Logs are saved to an auto-generated timestamped file.
147
+ Defaults to `False`.
148
+ log_dir (Optional[str]): Custom directory path for log files.
149
+ If None, defaults to "browser_log". Defaults to `None`.
150
+ session_id (Optional[str]): A unique identifier for this browser
151
+ session. When multiple HybridBrowserToolkit instances are
152
+ used
153
+ concurrently, different session IDs prevent them from sharing
154
+ the same browser session and causing conflicts. If None, a
155
+ default session will be used. Defaults to `None`.
156
+ default_start_url (str): The default URL to navigate to when
157
+ open_browser() is called without a start_url parameter or
158
+ with
159
+ None. Defaults to `"https://google.com/"`.
160
+ default_timeout (Optional[int]): Default timeout in milliseconds
161
+ for browser actions. If None, uses environment variable
162
+ HYBRID_BROWSER_DEFAULT_TIMEOUT or defaults to 3000ms.
163
+ Defaults to `None`.
164
+ short_timeout (Optional[int]): Short timeout in milliseconds
165
+ for quick browser actions. If None, uses environment variable
166
+ HYBRID_BROWSER_SHORT_TIMEOUT or defaults to 1000ms.
167
+ Defaults to `None`.
168
+ navigation_timeout (Optional[int]): Custom navigation timeout in
169
+ milliseconds.
170
+ If None, uses environment variable
171
+ HYBRID_BROWSER_NAVIGATION_TIMEOUT or defaults to 10000ms.
172
+ Defaults to `None`.
173
+ network_idle_timeout (Optional[int]): Custom network idle
174
+ timeout in milliseconds.
175
+ If None, uses environment variable
176
+ HYBRID_BROWSER_NETWORK_IDLE_TIMEOUT or defaults to 5000ms.
177
+ Defaults to `None`.
178
+ screenshot_timeout (Optional[int]): Custom screenshot timeout in
179
+ milliseconds.
180
+ If None, uses environment variable
181
+ HYBRID_BROWSER_SCREENSHOT_TIMEOUT or defaults to 15000ms.
182
+ Defaults to `None`.
183
+ page_stability_timeout (Optional[int]): Custom page stability
184
+ timeout in milliseconds.
185
+ If None, uses environment variable
186
+ HYBRID_BROWSER_PAGE_STABILITY_TIMEOUT or defaults to 1500ms.
187
+ Defaults to `None`.
188
+ dom_content_loaded_timeout (Optional[int]): Custom DOM content
189
+ loaded timeout in milliseconds.
190
+ If None, uses environment variable
191
+ HYBRID_BROWSER_DOM_CONTENT_LOADED_TIMEOUT or defaults to
192
+ 5000ms.
193
+ Defaults to `None`.
194
+ viewport_limit (bool): When True, only return snapshot results
195
+ visible in the current viewport. When False, return all
196
+ elements on the page regardless of visibility.
197
+ Defaults to `False`.
198
+ """
199
+ super().__init__()
200
+ RegisteredAgentToolkit.__init__(self)
201
+ self._headless = headless
202
+ self._user_data_dir = user_data_dir
203
+ self._stealth = stealth
204
+ self._web_agent_model = web_agent_model
205
+ self._cache_dir = cache_dir or "tmp/"
206
+ self._browser_log_to_file = browser_log_to_file
207
+ self._log_dir = log_dir
208
+ self._default_start_url = default_start_url or "https://google.com/"
209
+ self._session_id = session_id or "default"
210
+ self._viewport_limit = viewport_limit
211
+
212
+ # Store timeout configuration
213
+ self._default_timeout = default_timeout
214
+ self._short_timeout = short_timeout
215
+ self._navigation_timeout = ConfigLoader.get_navigation_timeout(
216
+ navigation_timeout
217
+ )
218
+ self._network_idle_timeout = ConfigLoader.get_network_idle_timeout(
219
+ network_idle_timeout
220
+ )
221
+ self._screenshot_timeout = ConfigLoader.get_screenshot_timeout(
222
+ screenshot_timeout
223
+ )
224
+ self._page_stability_timeout = ConfigLoader.get_page_stability_timeout(
225
+ page_stability_timeout
226
+ )
227
+ self._dom_content_loaded_timeout = (
228
+ ConfigLoader.get_dom_content_loaded_timeout(
229
+ dom_content_loaded_timeout
230
+ )
231
+ )
232
+
233
+ # Logging configuration - fixed values for simplicity
234
+ self.enable_action_logging = True
235
+ self.enable_timing_logging = True
236
+ self.enable_page_loading_logging = True
237
+ self.log_to_console = False # Always disabled for cleaner output
238
+ self.log_to_file = browser_log_to_file
239
+ self.max_log_length = None # No truncation for file logs
240
+
241
+ # Set up log file if needed
242
+ if self.log_to_file:
243
+ # Create log directory if it doesn't exist
244
+ log_dir = self._log_dir if self._log_dir else "browser_log"
245
+ os.makedirs(log_dir, exist_ok=True)
246
+
247
+ timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
248
+ self.log_file_path: Optional[str] = os.path.join(
249
+ log_dir, f"hybrid_browser_toolkit_{timestamp}_{session_id}.log"
250
+ )
251
+ else:
252
+ self.log_file_path = None
253
+
254
+ # Initialize log buffer for in-memory storage
255
+ self.log_buffer: List[Dict[str, Any]] = []
256
+
257
+ # Configure enabled tools
258
+ if enabled_tools is None:
259
+ self.enabled_tools = self.DEFAULT_TOOLS.copy()
260
+ else:
261
+ # Validate enabled tools
262
+ invalid_tools = [
263
+ tool for tool in enabled_tools if tool not in self.ALL_TOOLS
264
+ ]
265
+ if invalid_tools:
266
+ raise ValueError(
267
+ f"Invalid tools specified: {invalid_tools}. "
268
+ f"Available tools: {self.ALL_TOOLS}"
269
+ )
270
+ self.enabled_tools = enabled_tools.copy()
271
+
272
+ logger.info(f"Enabled tools: {self.enabled_tools}")
273
+
274
+ # Log initialization if file logging is enabled
275
+ if self.log_to_file:
276
+ logger.info(
277
+ "HybridBrowserToolkit initialized with file logging enabled"
278
+ )
279
+ logger.info(f"Log file path: {self.log_file_path}")
280
+
281
+ # Core components
282
+ temp_session = HybridBrowserSession(
283
+ headless=headless,
284
+ user_data_dir=user_data_dir,
285
+ stealth=stealth,
286
+ session_id=session_id,
287
+ default_timeout=default_timeout,
288
+ short_timeout=short_timeout,
289
+ )
290
+ # Use the session directly - singleton logic is handled in
291
+ # ensure_browser
292
+ self._session = temp_session
293
+ self._playwright_agent: Optional[PlaywrightLLMAgent] = None
294
+ self._unified_script = self._load_unified_analyzer()
295
+
296
+ @property
297
+ def web_agent_model(self) -> Optional[BaseModelBackend]:
298
+ """Get the web agent model."""
299
+ return self._web_agent_model
300
+
301
+ @web_agent_model.setter
302
+ def web_agent_model(self, value: Optional[BaseModelBackend]) -> None:
303
+ """Set the web agent model."""
304
+ self._web_agent_model = value
305
+
306
+ @property
307
+ def cache_dir(self) -> str:
308
+ """Get the cache directory."""
309
+ return self._cache_dir
310
+
311
+ def __del__(self):
312
+ r"""Cleanup browser resources on garbage collection."""
313
+ try:
314
+ import sys
315
+
316
+ if getattr(sys, "is_finalizing", lambda: False)():
317
+ return
318
+
319
+ import asyncio
320
+
321
+ try:
322
+ loop = asyncio.get_event_loop()
323
+ if not loop.is_closed() and not loop.is_running():
324
+ # Try to close browser with a timeout to prevent hanging
325
+ try:
326
+ loop.run_until_complete(
327
+ asyncio.wait_for(self.browser_close(), timeout=2.0)
328
+ )
329
+ except asyncio.TimeoutError:
330
+ pass # Skip cleanup if it takes too long
331
+ except (RuntimeError, ImportError):
332
+ pass # Event loop unavailable, skip cleanup
333
+ except Exception:
334
+ pass # Suppress all errors during garbage collection
335
+
336
+ def _load_unified_analyzer(self) -> str:
337
+ r"""Load the unified analyzer JavaScript script."""
338
+ script_path = os.path.join(
339
+ os.path.dirname(os.path.abspath(__file__)), "unified_analyzer.js"
340
+ )
341
+
342
+ try:
343
+ with open(
344
+ script_path, "r", encoding='utf-8', errors='replace'
345
+ ) as f:
346
+ script_content = f.read()
347
+
348
+ if not script_content.strip():
349
+ raise ValueError(f"Script is empty: {script_path}")
350
+
351
+ logger.debug(
352
+ f"Loaded unified analyzer ({len(script_content)} chars)"
353
+ )
354
+ return script_content
355
+ except FileNotFoundError:
356
+ raise FileNotFoundError(f"Script not found: {script_path}")
357
+
358
+ def _validate_ref(self, ref: str, method_name: str) -> None:
359
+ r"""Validate ref parameter."""
360
+ if not ref or not isinstance(ref, str):
361
+ raise ValueError(
362
+ f"{method_name}: 'ref' must be a non-empty string"
363
+ )
364
+
365
+ def _truncate_if_needed(self, content: Any) -> str:
366
+ r"""Truncate content if max_log_length is set."""
367
+ content_str = str(content)
368
+ if (
369
+ self.max_log_length is not None
370
+ and len(content_str) > self.max_log_length
371
+ ):
372
+ return content_str[: self.max_log_length] + "... [TRUNCATED]"
373
+ return content_str
374
+
375
+ async def _get_current_url(self) -> Optional[str]:
376
+ r"""Safely get the current URL of the active page."""
377
+ try:
378
+ page = await self._session.get_page()
379
+ if page and not page.is_closed():
380
+ return page.url
381
+ return None # Return None if page is closed
382
+ except Exception:
383
+ # This can happen if browser is not open.
384
+ return None
385
+
386
+ async def _log_action(
387
+ self,
388
+ action_name: str,
389
+ inputs: Dict[str, Any],
390
+ outputs: Any,
391
+ execution_time: float,
392
+ page_load_time: Optional[float] = None,
393
+ error: Optional[str] = None,
394
+ ) -> None:
395
+ r"""Log action details with comprehensive information."""
396
+ if not (self.enable_action_logging or self.enable_timing_logging):
397
+ return
398
+
399
+ current_url = await self._get_current_url()
400
+
401
+ log_entry: Dict[str, Any] = {
402
+ "timestamp": datetime.datetime.now().isoformat(),
403
+ "action": action_name,
404
+ "url": current_url,
405
+ "execution_time_ms": round(execution_time * 1000, 2),
406
+ }
407
+
408
+ if self.enable_action_logging:
409
+ log_entry["inputs"] = inputs
410
+ if error:
411
+ log_entry["error"] = str(error)
412
+ elif isinstance(outputs, dict):
413
+ # Unpack dictionary items into the log entry
414
+ log_entry.update(outputs)
415
+ else:
416
+ # For non-dict outputs, assign to 'outputs' key
417
+ log_entry["outputs"] = outputs
418
+
419
+ if page_load_time is not None and self.enable_page_loading_logging:
420
+ log_entry["page_load_time_ms"] = round(page_load_time * 1000, 2)
421
+
422
+ # Add to buffer
423
+ self.log_buffer.append(log_entry)
424
+
425
+ # Console logging
426
+ if self.log_to_console:
427
+ log_msg = f"[BROWSER ACTION] {action_name}"
428
+ if self.enable_timing_logging:
429
+ log_msg += (
430
+ f" | Execution: " f"{log_entry['execution_time_ms']}ms"
431
+ )
432
+ if page_load_time is not None and self.enable_page_loading_logging:
433
+ log_msg += (
434
+ f" | Page Load: " f"{log_entry['page_load_time_ms']}ms"
435
+ )
436
+ if error:
437
+ log_msg += f" | ERROR: {error}"
438
+
439
+ logger.info(log_msg)
440
+
441
+ if self.enable_action_logging:
442
+ logger.info(f" Inputs: {self._truncate_if_needed(inputs)}")
443
+ if not error:
444
+ if isinstance(outputs, dict):
445
+ for key, value in outputs.items():
446
+ logger.info(
447
+ f" - {key}: "
448
+ f"{self._truncate_if_needed(value)}"
449
+ )
450
+ else:
451
+ logger.info(
452
+ f" Outputs: {self._truncate_if_needed(outputs)}"
453
+ )
454
+
455
+ # File logging
456
+ if self.log_to_file and self.log_file_path:
457
+ try:
458
+ with open(self.log_file_path, 'a', encoding='utf-8') as f:
459
+ # Write full log entry to file without truncation
460
+ f.write(
461
+ json.dumps(log_entry, ensure_ascii=False, indent=2)
462
+ + '\n'
463
+ )
464
+ except Exception as e:
465
+ logger.error(f"Failed to write to log file: {e}")
466
+
467
+ @staticmethod
468
+ def action_logger(func: Callable[..., Any]) -> Callable[..., Any]:
469
+ r"""Decorator to add logging to action methods."""
470
+
471
+ @wraps(func)
472
+ async def wrapper(self, *args, **kwargs):
473
+ action_name = func.__name__
474
+ start_time = time.time()
475
+
476
+ # Log inputs
477
+ inputs = {
478
+ "args": args, # Don't skip self since it's already handled
479
+ "kwargs": kwargs,
480
+ }
481
+
482
+ try:
483
+ # Execute the original function
484
+ result = await func(self, *args, **kwargs)
485
+ execution_time = time.time() - start_time
486
+
487
+ # Log success
488
+ await self._log_action(
489
+ action_name=action_name,
490
+ inputs=inputs,
491
+ outputs=result,
492
+ execution_time=execution_time,
493
+ )
494
+
495
+ return result
496
+
497
+ except Exception as e:
498
+ execution_time = time.time() - start_time
499
+ error_msg = f"{type(e).__name__}: {e!s}"
500
+
501
+ # Log error
502
+ await self._log_action(
503
+ action_name=action_name,
504
+ inputs=inputs,
505
+ outputs=None,
506
+ execution_time=execution_time,
507
+ error=error_msg,
508
+ )
509
+
510
+ raise
511
+
512
+ return wrapper
513
+
514
+ async def _get_session(self) -> "HybridBrowserSession":
515
+ """Get the correct singleton session instance."""
516
+ singleton = await HybridBrowserSession._get_or_create_instance(
517
+ self._session
518
+ )
519
+ if singleton is not self._session:
520
+ logger.debug("Updating to singleton session instance")
521
+ self._session = singleton
522
+ return self._session
523
+
524
+ async def _ensure_browser(self):
525
+ # Get singleton instance and update self._session if needed
526
+ session = await self._get_session()
527
+ await session.ensure_browser()
528
+
529
+ async def _require_page(self):
530
+ # Get singleton instance and update self._session if needed
531
+ session = await self._get_session()
532
+ await session.ensure_browser()
533
+ return await session.get_page()
534
+
535
+ async def _wait_for_page_stability(self):
536
+ r"""Wait for page to become stable after actions that might trigger
537
+ updates. Optimized with shorter timeouts.
538
+ """
539
+ page = await self._require_page()
540
+ import asyncio
541
+
542
+ try:
543
+ # Wait for DOM content to be loaded (reduced timeout)
544
+ await page.wait_for_load_state(
545
+ 'domcontentloaded', timeout=self._page_stability_timeout
546
+ )
547
+ logger.debug("DOM content loaded")
548
+
549
+ # Try to wait for network idle with shorter timeout
550
+ try:
551
+ await page.wait_for_load_state(
552
+ 'networkidle', timeout=self._network_idle_timeout
553
+ )
554
+ logger.debug("Network idle achieved")
555
+ except Exception:
556
+ logger.debug("Network idle timeout - continuing anyway")
557
+
558
+ # Reduced delay for JavaScript execution
559
+ await asyncio.sleep(0.2) # Reduced from 0.5s
560
+ logger.debug("Page stability wait completed")
561
+
562
+ except Exception as e:
563
+ logger.debug(
564
+ f"Page stability wait failed: {e} - continuing anyway"
565
+ )
566
+
567
+ async def _get_unified_analysis(
568
+ self, max_retries: int = 3, viewport_limit: Optional[bool] = None
569
+ ) -> Dict[str, Any]:
570
+ r"""Get unified analysis data from the page with retry mechanism for
571
+ navigation issues."""
572
+ page = await self._require_page()
573
+
574
+ for attempt in range(max_retries):
575
+ try:
576
+ if not self._unified_script:
577
+ logger.error("Unified analyzer script not loaded")
578
+ return {"elements": {}, "metadata": {"elementCount": 0}}
579
+
580
+ # Wait for DOM stability before each attempt (with optimized
581
+ # timeout)
582
+ try:
583
+ await page.wait_for_load_state(
584
+ 'domcontentloaded',
585
+ timeout=self._dom_content_loaded_timeout,
586
+ )
587
+ except Exception:
588
+ # Don't fail if DOM wait times out
589
+ pass
590
+
591
+ # Use instance viewport_limit if parameter not provided
592
+ use_viewport_limit = (
593
+ viewport_limit
594
+ if viewport_limit is not None
595
+ else self._viewport_limit
596
+ )
597
+ result = await page.evaluate(
598
+ self._unified_script, use_viewport_limit
599
+ )
600
+
601
+ if not isinstance(result, dict):
602
+ logger.warning(f"Invalid result type: {type(result)}")
603
+ return {"elements": {}, "metadata": {"elementCount": 0}}
604
+
605
+ # Success - return result
606
+ if attempt > 0:
607
+ logger.debug(
608
+ f"Unified analysis succeeded on attempt "
609
+ f"{attempt + 1}"
610
+ )
611
+ return result
612
+
613
+ except Exception as e:
614
+ error_msg = str(e)
615
+
616
+ # Check if this is a navigation-related error
617
+ is_navigation_error = (
618
+ "Execution context was destroyed" in error_msg
619
+ or "Most likely because of a navigation" in error_msg
620
+ or "Target page, context or browser has been closed"
621
+ in error_msg
622
+ )
623
+
624
+ if is_navigation_error and attempt < max_retries - 1:
625
+ logger.debug(
626
+ f"Navigation error in unified analysis (attempt "
627
+ f"{attempt + 1}/{max_retries}): {e}. Retrying..."
628
+ )
629
+
630
+ # Wait a bit for page stability before retrying (
631
+ # optimized)
632
+ try:
633
+ await page.wait_for_load_state(
634
+ 'domcontentloaded',
635
+ timeout=self._page_stability_timeout,
636
+ )
637
+ # Reduced delay for JS context to stabilize
638
+ import asyncio
639
+
640
+ await asyncio.sleep(0.1) # Reduced from 0.2s
641
+ except Exception:
642
+ # Continue even if wait fails
643
+ pass
644
+
645
+ continue
646
+
647
+ # Non-navigation error or final attempt - log and return
648
+ # empty result
649
+ if attempt == max_retries - 1:
650
+ logger.warning(
651
+ f"Error in unified analysis after {max_retries} "
652
+ f"attempts: {e}"
653
+ )
654
+ else:
655
+ logger.warning(
656
+ f"Non-retryable error in unified analysis: {e}"
657
+ )
658
+
659
+ return {"elements": {}, "metadata": {"elementCount": 0}}
660
+
661
+ # Should not reach here, but just in case
662
+ return {"elements": {}, "metadata": {"elementCount": 0}}
663
+
664
+ def _convert_analysis_to_rects(
665
+ self, analysis_data: Dict[str, Any]
666
+ ) -> Dict[str, Any]:
667
+ r"""Convert analysis data to rect format for visual marking."""
668
+ rects = {}
669
+ elements = analysis_data.get("elements", {})
670
+
671
+ for ref, element_data in elements.items():
672
+ coordinates = element_data.get("coordinates", [])
673
+ if coordinates:
674
+ rects[ref] = {
675
+ "role": element_data.get("role", "generic"),
676
+ "aria-name": element_data.get("name", ""),
677
+ "rects": [coordinates[0]],
678
+ }
679
+ return rects
680
+
681
+ def _add_set_of_mark(self, image, rects):
682
+ r"""Add visual marks to the image."""
683
+ try:
684
+ from PIL import ImageDraw, ImageFont
685
+ except ImportError:
686
+ logger.warning("PIL not available, returning original image")
687
+ return image
688
+
689
+ marked_image = image.copy()
690
+ draw = ImageDraw.Draw(marked_image)
691
+
692
+ # Try to get font
693
+ try:
694
+ font = ImageFont.truetype("arial.ttf", 16)
695
+ except (OSError, IOError):
696
+ try:
697
+ font = ImageFont.load_default()
698
+ except (OSError, IOError):
699
+ font = None
700
+
701
+ # Color scheme
702
+ colors = {
703
+ "button": "#FF6B6B",
704
+ "link": "#4ECDC4",
705
+ "textbox": "#45B7D1",
706
+ "select": "#96CEB4",
707
+ "checkbox": "#FECA57",
708
+ "radio": "#FF9FF3",
709
+ "default": "#DDA0DD",
710
+ }
711
+
712
+ for ref, rect_data in rects.items():
713
+ rects_list = rect_data.get("rects", [])
714
+ role = rect_data.get("role", "generic")
715
+ color = colors.get(role, colors["default"])
716
+
717
+ for rect in rects_list:
718
+ x, y = rect.get("x", 0), rect.get("y", 0)
719
+ width, height = rect.get("width", 0), rect.get("height", 0)
720
+
721
+ # Draw rectangle outline
722
+ draw.rectangle(
723
+ [x, y, x + width, y + height], outline=color, width=2
724
+ )
725
+
726
+ # Draw reference label
727
+ label_text = ref
728
+ if font:
729
+ bbox = draw.textbbox((0, 0), label_text, font=font)
730
+ text_width, text_height = (
731
+ bbox[2] - bbox[0],
732
+ bbox[3] - bbox[1],
733
+ )
734
+ else:
735
+ text_width, text_height = len(label_text) * 8, 16
736
+
737
+ label_x, label_y = max(0, x - 2), max(0, y - text_height - 2)
738
+
739
+ # Background and text
740
+ draw.rectangle(
741
+ [
742
+ label_x,
743
+ label_y,
744
+ label_x + text_width + 4,
745
+ label_y + text_height + 2,
746
+ ],
747
+ fill=color,
748
+ )
749
+ draw.text(
750
+ (label_x + 2, label_y + 1),
751
+ label_text,
752
+ fill="white",
753
+ font=font,
754
+ )
755
+
756
+ return marked_image
757
+
758
+ def _format_snapshot_from_analysis(
759
+ self, analysis_data: Dict[str, Any]
760
+ ) -> str:
761
+ r"""Format analysis data into snapshot string."""
762
+ lines = []
763
+ elements = analysis_data.get("elements", {})
764
+
765
+ for ref, element_data in elements.items():
766
+ role = element_data.get("role", "generic")
767
+ name = element_data.get("name", "")
768
+
769
+ line = f"- {role}"
770
+ if name:
771
+ line += f' "{name}"'
772
+
773
+ # Add properties
774
+ props = []
775
+ for prop in ["disabled", "checked", "expanded"]:
776
+ value = element_data.get(prop)
777
+ if value is True:
778
+ props.append(prop)
779
+ elif value is not None and prop in ["checked", "expanded"]:
780
+ props.append(f"{prop}={value}")
781
+
782
+ if props:
783
+ line += f" {' '.join(props)}"
784
+
785
+ line += f" [ref={ref}]"
786
+ lines.append(line)
787
+
788
+ return "\n".join(lines)
789
+
790
+ async def _get_tab_info_for_output(self) -> Dict[str, Any]:
791
+ r"""Get tab information to include in action outputs."""
792
+ try:
793
+ # Ensure we have the correct singleton session instance first
794
+ session = await self._get_session()
795
+
796
+ # Add debug info for tab info retrieval
797
+ logger.debug("Attempting to get tab info from session...")
798
+ tab_info = await session.get_tab_info()
799
+ current_tab_index = await session.get_current_tab_id()
800
+
801
+ # Debug log the successful retrieval
802
+ logger.debug(
803
+ f"Successfully retrieved {len(tab_info)} tabs, current: "
804
+ f"{current_tab_index}"
805
+ )
806
+
807
+ return {
808
+ "tabs": tab_info,
809
+ "current_tab": current_tab_index,
810
+ "total_tabs": len(tab_info),
811
+ }
812
+ except Exception as e:
813
+ logger.warning(
814
+ f"Failed to get tab info from session: {type(e).__name__}: "
815
+ f"{e}"
816
+ )
817
+
818
+ # Try to get actual tab count from session pages directly
819
+ try:
820
+ # Get the correct session instance for fallback
821
+ fallback_session = await self._get_session()
822
+
823
+ # Check browser session state
824
+ session_state = {
825
+ "has_session": fallback_session is not None,
826
+ "has_pages_attr": hasattr(fallback_session, '_pages'),
827
+ "pages_count": len(fallback_session._pages)
828
+ if hasattr(fallback_session, '_pages')
829
+ else "unknown",
830
+ "has_page": hasattr(fallback_session, '_page')
831
+ and fallback_session._page is not None,
832
+ "session_id": getattr(
833
+ fallback_session, '_session_id', 'unknown'
834
+ ),
835
+ }
836
+ logger.debug(f"Browser session state: {session_state}")
837
+
838
+ actual_tab_count = 0
839
+ if (
840
+ hasattr(fallback_session, '_pages')
841
+ and fallback_session._pages
842
+ ):
843
+ actual_tab_count = len(fallback_session._pages)
844
+ # Also try to filter out closed pages
845
+ try:
846
+ open_pages = [
847
+ p
848
+ for p in fallback_session._pages.values()
849
+ if not p.is_closed()
850
+ ]
851
+ actual_tab_count = len(open_pages)
852
+ logger.debug(
853
+ f"Found {actual_tab_count} open tabs out of "
854
+ f"{len(fallback_session._pages)} total"
855
+ )
856
+ except Exception:
857
+ # Keep the original count if we can't check page
858
+ # status
859
+ pass
860
+
861
+ if actual_tab_count == 0:
862
+ # If no pages, check if browser is even initialized
863
+ if (
864
+ hasattr(fallback_session, '_page')
865
+ and fallback_session._page is not None
866
+ ):
867
+ actual_tab_count = 1
868
+ logger.debug(
869
+ "No pages in list but main page exists, "
870
+ "assuming "
871
+ "1 tab"
872
+ )
873
+ else:
874
+ actual_tab_count = 1
875
+ logger.debug("No pages found, defaulting to 1 tab")
876
+
877
+ logger.debug(f"Using fallback tab count: {actual_tab_count}")
878
+ return {
879
+ "tabs": [],
880
+ "current_tab": 0,
881
+ "total_tabs": actual_tab_count,
882
+ }
883
+
884
+ except Exception as fallback_error:
885
+ logger.warning(
886
+ f"Fallback tab count also failed: "
887
+ f"{type(fallback_error).__name__}: {fallback_error}"
888
+ )
889
+ return {"tabs": [], "current_tab": 0, "total_tabs": 1}
890
+
891
+ async def _exec_with_snapshot(
892
+ self,
893
+ action: Dict[str, Any],
894
+ element_details: Optional[Dict[str, Any]] = None,
895
+ ) -> Dict[str, str]:
896
+ r"""Execute action and return result with snapshot comparison."""
897
+
898
+ # Log action execution start
899
+ action_type = action.get("type", "unknown")
900
+ logger.info(f"Executing action: {action_type}")
901
+
902
+ action_start_time = time.time()
903
+ inputs: Dict[str, Any] = {"action": action}
904
+ page_load_time = None
905
+
906
+ try:
907
+ # Get before snapshot
908
+ logger.info("Capturing pre-action snapshot...")
909
+ snapshot_start_before = time.time()
910
+ before_snapshot = await self._session.get_snapshot(
911
+ force_refresh=True, diff_only=False
912
+ )
913
+ before_snapshot_time = time.time() - snapshot_start_before
914
+ logger.info(
915
+ f"Pre-action snapshot captured in "
916
+ f"{before_snapshot_time:.2f}s"
917
+ )
918
+
919
+ # Execute action
920
+ logger.info(f"Executing {action_type} action...")
921
+ exec_start = time.time()
922
+ exec_result = await self._session.exec_action(action)
923
+ exec_time = time.time() - exec_start
924
+ logger.info(f"Action {action_type} completed in {exec_time:.2f}s")
925
+
926
+ # Parse the detailed result from ActionExecutor
927
+ if isinstance(exec_result, dict):
928
+ result_message = exec_result.get("message", str(exec_result))
929
+ action_details = exec_result.get("details", {})
930
+ success = exec_result.get("success", True)
931
+ else:
932
+ result_message = str(exec_result)
933
+ action_details = {}
934
+ success = True
935
+
936
+ # Wait for page stability after action (especially important for
937
+ # click)
938
+ stability_time: float = 0.0
939
+ if action_type in ["click", "type", "select", "enter"]:
940
+ logger.info(
941
+ f"Waiting for page stability " f"after {action_type}..."
942
+ )
943
+ stability_start = time.time()
944
+ await self._wait_for_page_stability()
945
+ stability_time = time.time() - stability_start
946
+ logger.info(
947
+ f"Page stability wait "
948
+ f"completed in "
949
+ f"{stability_time:.2f}s"
950
+ )
951
+ page_load_time = stability_time
952
+
953
+ # Enhanced logging for page loading times
954
+ if self.enable_page_loading_logging and self.log_to_console:
955
+ logger.info(
956
+ f"[PAGE LOADING] Page stability for {action_type}: "
957
+ f"{round(stability_time * 1000, 2)}ms"
958
+ )
959
+
960
+ # Get after snapshot
961
+ logger.info("Capturing post-action snapshot...")
962
+ snapshot_start_after = time.time()
963
+ after_snapshot = await self._session.get_snapshot(
964
+ force_refresh=True, diff_only=False
965
+ )
966
+ after_snapshot_time = time.time() - snapshot_start_after
967
+ logger.info(
968
+ f"Post-action snapshot "
969
+ f"captured in {after_snapshot_time:.2f}s"
970
+ )
971
+
972
+ # Check for snapshot quality and log warnings
973
+ if before_snapshot == after_snapshot:
974
+ snapshot = "snapshot not changed"
975
+ logger.debug("Page snapshot unchanged after action")
976
+ else:
977
+ snapshot = after_snapshot
978
+ # Check if snapshot is empty or problematic
979
+ if "<empty>" in after_snapshot:
980
+ logger.warning(
981
+ f"Action {action_type} resulted "
982
+ f"in empty snapshot - "
983
+ f"page may still be loading"
984
+ )
985
+ elif len(after_snapshot.strip()) < 50:
986
+ logger.warning(
987
+ f"Action {action_type} resulted "
988
+ f"in very short snapshot:"
989
+ f" {len(after_snapshot)} chars"
990
+ )
991
+ else:
992
+ logger.debug(
993
+ f"Action {action_type} resulted "
994
+ f"in updated snapshot: "
995
+ f"{len(after_snapshot)} chars"
996
+ )
997
+
998
+ # Get tab information for output
999
+ tab_info = await self._get_tab_info_for_output()
1000
+
1001
+ # Create comprehensive output for logging
1002
+ execution_time = time.time() - action_start_time
1003
+ total_snapshot_time = before_snapshot_time + after_snapshot_time
1004
+ outputs = {
1005
+ "result": result_message,
1006
+ "snapshot": snapshot,
1007
+ "success": success,
1008
+ "action_details": action_details,
1009
+ "execution_stats": {
1010
+ "exec_time_ms": round(exec_time * 1000, 2),
1011
+ "stability_time_ms": round(stability_time * 1000, 2)
1012
+ if stability_time > 0
1013
+ else None,
1014
+ "snapshot_time_ms": round(total_snapshot_time * 1000, 2),
1015
+ "total_time_ms": round(execution_time * 1000, 2),
1016
+ },
1017
+ **tab_info, # Include tab information
1018
+ }
1019
+
1020
+ # If snapshot is unchanged after click, add element details to
1021
+ # log
1022
+ if (
1023
+ snapshot == "snapshot not changed"
1024
+ and action_type == "click"
1025
+ and element_details
1026
+ ):
1027
+ logger.debug(
1028
+ "Snapshot unchanged after click. "
1029
+ "Adding element details to log."
1030
+ )
1031
+ outputs["clicked_element_tag"] = element_details.get(
1032
+ "tagName", "N/A"
1033
+ )
1034
+ outputs["clicked_element_content"] = element_details.get(
1035
+ "name", ""
1036
+ )
1037
+ outputs["clicked_element_type"] = element_details.get(
1038
+ "role", "generic"
1039
+ )
1040
+
1041
+ # Log the action with all details
1042
+ await self._log_action(
1043
+ action_name=f"_exec_with_snapshot_{action_type}",
1044
+ inputs=inputs,
1045
+ outputs=outputs,
1046
+ execution_time=execution_time,
1047
+ page_load_time=page_load_time,
1048
+ )
1049
+
1050
+ return {"result": result_message, "snapshot": snapshot}
1051
+
1052
+ except Exception as e:
1053
+ execution_time = time.time() - action_start_time
1054
+ error_msg = f"{type(e).__name__}: {e!s}"
1055
+
1056
+ # Log error
1057
+ await self._log_action(
1058
+ action_name=f"_exec_with_snapshot_{action_type}",
1059
+ inputs=inputs,
1060
+ outputs=None,
1061
+ execution_time=execution_time,
1062
+ page_load_time=page_load_time,
1063
+ error=error_msg,
1064
+ )
1065
+
1066
+ raise
1067
+
1068
+ async def _extract_links_by_refs(
1069
+ self, snapshot: str, page, refs: List[str]
1070
+ ) -> List[Dict[str, str]]:
1071
+ r"""Extract multiple links by their reference IDs."""
1072
+ import re
1073
+
1074
+ found_links = []
1075
+ ref_set = set(refs)
1076
+ lines = snapshot.split('\n')
1077
+
1078
+ for line in lines:
1079
+ link_match = re.search(
1080
+ r'- link\s+"([^"]+)"\s+\[ref=([^\]]+)\]', line
1081
+ )
1082
+ if link_match and link_match.group(2) in ref_set:
1083
+ text, found_ref = link_match.groups()
1084
+ try:
1085
+ url = await self._get_link_url_by_ref(page, found_ref)
1086
+ found_links.append(
1087
+ {"text": text, "ref": found_ref, "url": url or ""}
1088
+ )
1089
+ except Exception as e:
1090
+ logger.warning(
1091
+ f"Failed to get URL for ref {found_ref}: {e}"
1092
+ )
1093
+ found_links.append(
1094
+ {"text": text, "ref": found_ref, "url": ""}
1095
+ )
1096
+
1097
+ return found_links
1098
+
1099
+ async def _get_link_url_by_ref(self, page, ref: str) -> str:
1100
+ r"""Get URL of a link element by reference ID."""
1101
+ try:
1102
+ element = await page.query_selector(f'[aria-ref="{ref}"]')
1103
+ if element:
1104
+ href = await element.get_attribute('href')
1105
+ if href:
1106
+ from urllib.parse import urljoin
1107
+
1108
+ return urljoin(page.url, href)
1109
+ return ""
1110
+ except Exception as e:
1111
+ logger.warning(f"Failed to get URL for ref {ref}: {e}")
1112
+ return ""
1113
+
1114
+ def _ensure_agent(self) -> PlaywrightLLMAgent:
1115
+ r"""Create PlaywrightLLMAgent on first use."""
1116
+ if self._web_agent_model is None:
1117
+ raise RuntimeError(
1118
+ "web_agent_model required for high-level task planning"
1119
+ )
1120
+
1121
+ if self._playwright_agent is None:
1122
+ self._playwright_agent = PlaywrightLLMAgent(
1123
+ headless=self._headless,
1124
+ user_data_dir=self._user_data_dir,
1125
+ model_backend=self._web_agent_model,
1126
+ )
1127
+ return self._playwright_agent
1128
+
1129
+ # Public API Methods
1130
+
1131
+ async def browser_open(self) -> Dict[str, Any]:
1132
+ r"""Starts a new browser session. This must be the first browser
1133
+ action.
1134
+
1135
+ This method initializes the browser and navigates to a default start
1136
+ page. To visit a specific URL, use `visit_page` after this.
1137
+
1138
+ Returns:
1139
+ Dict[str, Any]: A dictionary with the result of the action:
1140
+ - "result" (str): Confirmation of the action.
1141
+ - "snapshot" (str): A textual snapshot of interactive
1142
+ elements.
1143
+ - "tabs" (List[Dict]): Information about all open tabs.
1144
+ - "current_tab" (int): Index of the active tab.
1145
+ - "total_tabs" (int): Total number of open tabs.
1146
+ """
1147
+ # Add logging if enabled
1148
+ action_start = time.time()
1149
+ inputs: Dict[str, Any] = {} # No input parameters for agents
1150
+
1151
+ logger.info("Starting browser session...")
1152
+
1153
+ browser_start = time.time()
1154
+ await self._session.ensure_browser()
1155
+ browser_time = time.time() - browser_start
1156
+ logger.info(f"Browser session started in {browser_time:.2f}s")
1157
+
1158
+ try:
1159
+ # Always use the configured default start URL
1160
+ start_url = self._default_start_url
1161
+ logger.info(f"Navigating to configured default page: {start_url}")
1162
+
1163
+ # Use visit_page without creating a new tab
1164
+ result = await self.browser_visit_page(url=start_url)
1165
+
1166
+ # Log success
1167
+ if self.enable_action_logging or self.enable_timing_logging:
1168
+ execution_time = time.time() - action_start
1169
+ await self._log_action(
1170
+ action_name="browser_open",
1171
+ inputs=inputs,
1172
+ outputs={
1173
+ "result": "Browser opened and navigated to "
1174
+ "default page."
1175
+ },
1176
+ execution_time=execution_time,
1177
+ )
1178
+
1179
+ return result
1180
+
1181
+ except Exception as e:
1182
+ # Log error
1183
+ if self.enable_action_logging or self.enable_timing_logging:
1184
+ execution_time = time.time() - action_start
1185
+ await self._log_action(
1186
+ action_name="browser_open",
1187
+ inputs=inputs,
1188
+ outputs=None,
1189
+ execution_time=execution_time,
1190
+ error=f"{type(e).__name__}: {e!s}",
1191
+ )
1192
+ raise
1193
+
1194
+ @action_logger
1195
+ async def browser_close(self) -> str:
1196
+ r"""Closes the browser session, releasing all resources.
1197
+
1198
+ This should be called at the end of a task for cleanup.
1199
+
1200
+ Returns:
1201
+ str: A confirmation message.
1202
+ """
1203
+ if self._playwright_agent is not None:
1204
+ try:
1205
+ await self._playwright_agent.close()
1206
+ except Exception:
1207
+ pass
1208
+ self._playwright_agent = None
1209
+
1210
+ await self._session.close()
1211
+ return "Browser session closed."
1212
+
1213
+ @action_logger
1214
+ async def browser_visit_page(self, url: str) -> Dict[str, Any]:
1215
+ r"""Opens a URL in a new browser tab and switches to it.
1216
+
1217
+ Args:
1218
+ url (str): The web address to load. This should be a valid and
1219
+ existing URL.
1220
+
1221
+ Returns:
1222
+ Dict[str, Any]: A dictionary with the result of the action:
1223
+ - "result" (str): Confirmation of the action.
1224
+ - "snapshot" (str): A textual snapshot of the new page.
1225
+ - "tabs" (List[Dict]): Information about all open tabs.
1226
+ - "current_tab" (int): Index of the new active tab.
1227
+ - "total_tabs" (int): Total number of open tabs.
1228
+ """
1229
+ if not url or not isinstance(url, str):
1230
+ return {
1231
+ "result": "Error: 'url' must be a non-empty string",
1232
+ "snapshot": "",
1233
+ "tabs": [],
1234
+ "current_tab": 0,
1235
+ "total_tabs": 1,
1236
+ }
1237
+
1238
+ if '://' not in url:
1239
+ url = f'https://{url}'
1240
+
1241
+ await self._ensure_browser()
1242
+ session = await self._get_session()
1243
+ nav_result = ""
1244
+
1245
+ # By default, we want to create a new tab.
1246
+ should_create_new_tab = True
1247
+ try:
1248
+ # If the browser has just started with a single "about:blank"
1249
+ # tab,
1250
+ # use that tab instead of creating a new one.
1251
+ tab_info_data = await self._get_tab_info_for_output()
1252
+ tabs = tab_info_data.get("tabs", [])
1253
+ if len(tabs) == 1 and tabs[0].get("url") == "about:blank":
1254
+ logger.info(
1255
+ "Found single blank tab, navigating in current tab "
1256
+ "instead of creating a new one."
1257
+ )
1258
+ should_create_new_tab = False
1259
+ except Exception as e:
1260
+ logger.warning(
1261
+ "Could not get tab info to check for blank tab, "
1262
+ f"proceeding with default behavior (new tab). Error: {e}"
1263
+ )
1264
+
1265
+ if should_create_new_tab:
1266
+ logger.info(f"Creating new tab and navigating to URL: {url}")
1267
+ try:
1268
+ new_tab_id = await session.create_new_tab(url)
1269
+ await session.switch_to_tab(new_tab_id)
1270
+ nav_result = f"Visited {url} in new tab {new_tab_id}"
1271
+ except Exception as e:
1272
+ logger.error(f"Failed to create new tab and navigate: {e}")
1273
+ nav_result = f"Error creating new tab: {e}"
1274
+ else:
1275
+ logger.info(f"Navigating to URL in current tab: {url}")
1276
+ nav_result = await session.visit(url)
1277
+
1278
+ # Get snapshot
1279
+ snapshot = ""
1280
+ try:
1281
+ snapshot = await session.get_snapshot(
1282
+ force_refresh=True, diff_only=False
1283
+ )
1284
+ except Exception as e:
1285
+ logger.warning(f"Failed to capture snapshot: {e}")
1286
+
1287
+ # Get tab information
1288
+ tab_info = await self._get_tab_info_for_output()
1289
+
1290
+ return {"result": nav_result, "snapshot": snapshot, **tab_info}
1291
+
1292
+ @action_logger
1293
+ async def browser_back(self) -> Dict[str, Any]:
1294
+ r"""Goes back to the previous page in the browser history.
1295
+
1296
+ This action simulates using the browser's "back" button in the
1297
+ currently active tab.
1298
+
1299
+ Returns:
1300
+ Dict[str, Any]: A dictionary with the result of the action:
1301
+ - "result" (str): Confirmation of the action.
1302
+ - "snapshot" (str): A textual snapshot of the previous page.
1303
+ - "tabs" (List[Dict]): Information about all open tabs.
1304
+ - "current_tab" (int): Index of the active tab.
1305
+ - "total_tabs" (int): Total number of open tabs.
1306
+ """
1307
+ page = await self._require_page()
1308
+
1309
+ try:
1310
+ logger.info("Navigating back in browser history...")
1311
+ nav_start = time.time()
1312
+ await page.go_back(
1313
+ wait_until="domcontentloaded", timeout=self._navigation_timeout
1314
+ )
1315
+ nav_time = time.time() - nav_start
1316
+ logger.info(f"Back navigation completed in {nav_time:.2f}s")
1317
+
1318
+ # Minimal wait for page stability (back navigation is usually
1319
+ # fast)
1320
+ import asyncio
1321
+
1322
+ await asyncio.sleep(0.2)
1323
+
1324
+ # Get snapshot
1325
+ logger.info("Capturing page snapshot after back navigation...")
1326
+ snapshot_start = time.time()
1327
+ snapshot = await self._session.get_snapshot(
1328
+ force_refresh=True, diff_only=False
1329
+ )
1330
+ snapshot_time = time.time() - snapshot_start
1331
+ logger.info(
1332
+ f"Back navigation snapshot captured in {snapshot_time:.2f}s"
1333
+ )
1334
+
1335
+ # Get tab information
1336
+ tab_info = await self._get_tab_info_for_output()
1337
+
1338
+ return {
1339
+ "result": "Back navigation successful.",
1340
+ "snapshot": snapshot,
1341
+ **tab_info,
1342
+ }
1343
+
1344
+ except Exception as e:
1345
+ logger.warning(f"Back navigation failed: {e}")
1346
+ # Get current snapshot even if navigation failed
1347
+ snapshot = await self._session.get_snapshot(
1348
+ force_refresh=True, diff_only=False
1349
+ )
1350
+ tab_info = await self._get_tab_info_for_output()
1351
+ return {
1352
+ "result": f"Back navigation failed: {e!s}",
1353
+ "snapshot": snapshot,
1354
+ **tab_info,
1355
+ }
1356
+
1357
+ @action_logger
1358
+ async def browser_forward(self) -> Dict[str, Any]:
1359
+ r"""Goes forward to the next page in the browser history.
1360
+
1361
+ This action simulates using the browser's "forward" button in the
1362
+ currently active tab.
1363
+
1364
+ Returns:
1365
+ Dict[str, Any]: A dictionary with the result of the action:
1366
+ - "result" (str): Confirmation of the action.
1367
+ - "snapshot" (str): A textual snapshot of the next page.
1368
+ - "tabs" (List[Dict]): Information about all open tabs.
1369
+ - "current_tab" (int): Index of the active tab.
1370
+ - "total_tabs" (int): Total number of open tabs.
1371
+ """
1372
+ page = await self._require_page()
1373
+
1374
+ try:
1375
+ logger.info("Navigating forward in browser history...")
1376
+ nav_start = time.time()
1377
+ await page.go_forward(
1378
+ wait_until="domcontentloaded", timeout=self._navigation_timeout
1379
+ )
1380
+ nav_time = time.time() - nav_start
1381
+ logger.info(f"Forward navigation completed in {nav_time:.2f}s")
1382
+
1383
+ # Minimal wait for page stability (forward navigation is usually
1384
+ # fast)
1385
+ import asyncio
1386
+
1387
+ await asyncio.sleep(0.2)
1388
+
1389
+ # Get snapshot
1390
+ logger.info("Capturing page snapshot after forward navigation...")
1391
+ snapshot_start = time.time()
1392
+ snapshot = await self._session.get_snapshot(
1393
+ force_refresh=True, diff_only=False
1394
+ )
1395
+ snapshot_time = time.time() - snapshot_start
1396
+ logger.info(
1397
+ f"Forward navigation snapshot captured in "
1398
+ f"{snapshot_time:.2f}s"
1399
+ )
1400
+
1401
+ # Get tab information
1402
+ tab_info = await self._get_tab_info_for_output()
1403
+
1404
+ return {
1405
+ "result": "Forward navigation successful.",
1406
+ "snapshot": snapshot,
1407
+ **tab_info,
1408
+ }
1409
+
1410
+ except Exception as e:
1411
+ logger.warning(f"Forward navigation failed: {e}")
1412
+ # Get current snapshot even if navigation failed
1413
+ snapshot = await self._session.get_snapshot(
1414
+ force_refresh=True, diff_only=False
1415
+ )
1416
+ tab_info = await self._get_tab_info_for_output()
1417
+ return {
1418
+ "result": f"Forward navigation failed: {e!s}",
1419
+ "snapshot": snapshot,
1420
+ **tab_info,
1421
+ }
1422
+
1423
+ @action_logger
1424
+ async def browser_get_page_snapshot(self) -> str:
1425
+ r"""Gets a textual snapshot of the page's interactive elements.
1426
+
1427
+ The snapshot lists elements like buttons, links, and inputs,
1428
+ each with
1429
+ a unique `ref` ID. This ID is used by other tools (e.g., `click`,
1430
+ `type`) to interact with a specific element. This tool provides no
1431
+ visual information.
1432
+
1433
+ Returns:
1434
+ str: A formatted string representing the interactive elements and
1435
+ their `ref` IDs. For example:
1436
+ '- link "Sign In" [ref=1]'
1437
+ '- textbox "Username" [ref=2]'
1438
+ """
1439
+ logger.info("Capturing page snapshot")
1440
+
1441
+ analysis_start = time.time()
1442
+ analysis_data = await self._get_unified_analysis()
1443
+ analysis_time = time.time() - analysis_start
1444
+ logger.info(
1445
+ f"Page snapshot analysis " f"completed in {analysis_time:.2f}s"
1446
+ )
1447
+
1448
+ snapshot_text = analysis_data.get("snapshotText", "")
1449
+ return (
1450
+ snapshot_text
1451
+ if snapshot_text
1452
+ else self._format_snapshot_from_analysis(analysis_data)
1453
+ )
1454
+
1455
+ @dependencies_required('PIL')
1456
+ @action_logger
1457
+ async def browser_get_som_screenshot(
1458
+ self,
1459
+ read_image: bool = True,
1460
+ instruction: Optional[str] = None,
1461
+ ):
1462
+ r"""Captures a screenshot with interactive elements highlighted.
1463
+
1464
+ "SoM" stands for "Set of Marks". This tool takes a screenshot and
1465
+ draws
1466
+ boxes around clickable elements, overlaying a `ref` ID on each. Use
1467
+ this for a visual understanding of the page, especially when the
1468
+ textual snapshot is not enough.
1469
+
1470
+ Args:
1471
+ read_image (bool, optional): If `True`, the agent will analyze
1472
+ the screenshot. Requires agent to be registered.
1473
+ (default: :obj:`True`)
1474
+ instruction (Optional[str], optional): A specific question or
1475
+ command for the agent regarding the screenshot, used only if
1476
+ `read_image` is `True`. For example: "Find the login button."
1477
+
1478
+ Returns:
1479
+ str: A summary message including the file path of the saved
1480
+ screenshot, e.g., "Visual webpage screenshot captured with 42
1481
+ interactive elements and saved to /path/to/screenshot.png",
1482
+ and optionally the agent's analysis if `read_image` is
1483
+ `True`.
1484
+ """
1485
+ from PIL import Image
1486
+
1487
+ os.makedirs(self._cache_dir, exist_ok=True)
1488
+ # Get screenshot and analysis
1489
+ page = await self._require_page()
1490
+
1491
+ # Log screenshot timeout start
1492
+ logger.info(
1493
+ f"Starting screenshot capture"
1494
+ f"with timeout: {self._screenshot_timeout}ms"
1495
+ )
1496
+
1497
+ start_time = time.time()
1498
+ image_data = await page.screenshot(timeout=self._screenshot_timeout)
1499
+ screenshot_time = time.time() - start_time
1500
+
1501
+ logger.info(f"Screenshot capture completed in {screenshot_time:.2f}s")
1502
+ image = Image.open(io.BytesIO(image_data))
1503
+
1504
+ # Log unified analysis start
1505
+ logger.info("Starting unified page analysis...")
1506
+ analysis_start_time = time.time()
1507
+ analysis_data = await self._get_unified_analysis()
1508
+ analysis_time = time.time() - analysis_start_time
1509
+ logger.info(f"Unified page analysis completed in {analysis_time:.2f}s")
1510
+
1511
+ # Log image processing
1512
+ logger.info("Processing visual marks on screenshot...")
1513
+ mark_start_time = time.time()
1514
+ rects = self._convert_analysis_to_rects(analysis_data)
1515
+ marked_image = self._add_set_of_mark(image, rects)
1516
+ mark_time = time.time() - mark_start_time
1517
+ logger.info(f"Visual marks processing completed in {mark_time:.2f}s")
1518
+
1519
+ # Save screenshot to cache directory
1520
+ parsed_url = urllib.parse.urlparse(page.url)
1521
+ url_name = sanitize_filename(str(parsed_url.path), max_length=241)
1522
+ timestamp = datetime.datetime.now().strftime("%m%d%H%M%S")
1523
+ file_path = os.path.join(
1524
+ self._cache_dir, f"{url_name}_{timestamp}_som.png"
1525
+ )
1526
+ marked_image.save(file_path, "PNG")
1527
+
1528
+ text_result = (
1529
+ f"Visual webpage screenshot captured with {len(rects)} "
1530
+ f"interactive elements."
1531
+ )
1532
+
1533
+ # Analyze image if requested and agent is registered
1534
+ if read_image and file_path:
1535
+ if self.agent is None:
1536
+ logger.error(
1537
+ "Cannot analyze screenshot: No agent registered. "
1538
+ "Please pass this toolkit to ChatAgent via "
1539
+ "toolkits_to_register_agent parameter."
1540
+ )
1541
+ text_result += (
1542
+ " Error: No agent registered for image analysis. "
1543
+ "Please pass this toolkit to ChatAgent via "
1544
+ "toolkits_to_register_agent parameter."
1545
+ )
1546
+ else:
1547
+ try:
1548
+ # Load the image and create a message
1549
+ from camel.messages import BaseMessage
1550
+
1551
+ img = Image.open(file_path)
1552
+ inst = instruction if instruction is not None else ""
1553
+ message = BaseMessage.make_user_message(
1554
+ role_name="User",
1555
+ content=inst,
1556
+ image_list=[img],
1557
+ )
1558
+
1559
+ # Get agent's analysis
1560
+ await self.agent.astep(message)
1561
+ except Exception as e:
1562
+ logger.error(f"Error analyzing screenshot: {e}")
1563
+ text_result += f". Error analyzing screenshot: {e}"
1564
+
1565
+ return text_result
1566
+
1567
+ async def browser_click(self, *, ref: str) -> Dict[str, Any]:
1568
+ r"""Performs a click on an element on the page.
1569
+
1570
+ Args:
1571
+ ref (str): The `ref` ID of the element to click. This ID is
1572
+ obtained from a page snapshot (`get_page_snapshot` or
1573
+ `get_som_screenshot`).
1574
+
1575
+ Returns:
1576
+ Dict[str, Any]: A dictionary with the result of the action:
1577
+ - "result" (str): Confirmation of the action.
1578
+ - "snapshot" (str): A textual snapshot of the page after the
1579
+ click.
1580
+ - "tabs" (List[Dict]): Information about all open tabs.
1581
+ - "current_tab" (int): Index of the active tab.
1582
+ - "total_tabs" (int): Total number of open tabs.
1583
+ """
1584
+ self._validate_ref(ref, "click")
1585
+
1586
+ analysis = await self._get_unified_analysis()
1587
+ elements = analysis.get("elements", {})
1588
+ if ref not in elements:
1589
+ logger.error(f"Error: Element reference '{ref}' not found. ")
1590
+ # Added snapshot to give more context on failure
1591
+ snapshot = self._format_snapshot_from_analysis(analysis)
1592
+ tab_info = await self._get_tab_info_for_output()
1593
+ return {
1594
+ "result": f"Error: Element reference '{ref}' not found. ",
1595
+ "snapshot": snapshot,
1596
+ **tab_info,
1597
+ }
1598
+
1599
+ element_details = elements.get(ref)
1600
+ action = {"type": "click", "ref": ref}
1601
+ result = await self._exec_with_snapshot(
1602
+ action, element_details=element_details
1603
+ )
1604
+
1605
+ # Add tab information to the result
1606
+ tab_info = await self._get_tab_info_for_output()
1607
+ result.update(tab_info)
1608
+
1609
+ return result
1610
+
1611
+ async def browser_type(self, *, ref: str, text: str) -> Dict[str, Any]:
1612
+ r"""Types text into an input element on the page.
1613
+
1614
+ Args:
1615
+ ref (str): The `ref` ID of the input element, from a snapshot.
1616
+ text (str): The text to type into the element.
1617
+
1618
+ Returns:
1619
+ Dict[str, Any]: A dictionary with the result of the action:
1620
+ - "result" (str): Confirmation of the action.
1621
+ - "snapshot" (str): A textual snapshot of the page after
1622
+ typing.
1623
+ - "tabs" (List[Dict]): Information about all open tabs.
1624
+ - "current_tab" (int): Index of the active tab.
1625
+ - "total_tabs" (int): Total number of open tabs.
1626
+ """
1627
+ self._validate_ref(ref, "type")
1628
+ await self._get_unified_analysis() # Ensure aria-ref attributes
1629
+
1630
+ action = {"type": "type", "ref": ref, "text": text}
1631
+ result = await self._exec_with_snapshot(action)
1632
+
1633
+ # Add tab information to the result
1634
+ tab_info = await self._get_tab_info_for_output()
1635
+ result.update(tab_info)
1636
+
1637
+ return result
1638
+
1639
+ async def browser_select(self, *, ref: str, value: str) -> Dict[str, Any]:
1640
+ r"""Selects an option in a dropdown (`<select>`) element.
1641
+
1642
+ Args:
1643
+ ref (str): The `ref` ID of the `<select>` element.
1644
+ value (str): The `value` attribute of the `<option>` to select,
1645
+ not its visible text.
1646
+
1647
+ Returns:
1648
+ Dict[str, Any]: A dictionary with the result of the action:
1649
+ - "result" (str): Confirmation of the action.
1650
+ - "snapshot" (str): A snapshot of the page after the
1651
+ selection.
1652
+ - "tabs" (List[Dict]): Information about all open tabs.
1653
+ - "current_tab" (int): Index of the active tab.
1654
+ - "total_tabs" (int): Total number of open tabs.
1655
+ """
1656
+ self._validate_ref(ref, "select")
1657
+ await self._get_unified_analysis()
1658
+
1659
+ action = {"type": "select", "ref": ref, "value": value}
1660
+ result = await self._exec_with_snapshot(action)
1661
+
1662
+ # Add tab information to the result
1663
+ tab_info = await self._get_tab_info_for_output()
1664
+ result.update(tab_info)
1665
+
1666
+ return result
1667
+
1668
+ async def browser_scroll(
1669
+ self, *, direction: str, amount: int
1670
+ ) -> Dict[str, Any]:
1671
+ r"""Scrolls the current page window.
1672
+
1673
+ Args:
1674
+ direction (str): The direction to scroll: 'up' or 'down'.
1675
+ amount (int): The number of pixels to scroll.
1676
+
1677
+ Returns:
1678
+ Dict[str, Any]: A dictionary with the result of the action:
1679
+ - "result" (str): Confirmation of the action.
1680
+ - "snapshot" (str): A snapshot of the page after scrolling.
1681
+ - "tabs" (List[Dict]): Information about all open tabs.
1682
+ - "current_tab" (int): Index of the active tab.
1683
+ - "total_tabs" (int): Total number of open tabs.
1684
+ """
1685
+ if direction not in ("up", "down"):
1686
+ tab_info = await self._get_tab_info_for_output()
1687
+ return {
1688
+ "result": "Error: direction must be 'up' or 'down'",
1689
+ "snapshot": "",
1690
+ **tab_info,
1691
+ }
1692
+
1693
+ action = {"type": "scroll", "direction": direction, "amount": amount}
1694
+ result = await self._exec_with_snapshot(action)
1695
+
1696
+ # Add tab information to the result
1697
+ tab_info = await self._get_tab_info_for_output()
1698
+ result.update(tab_info)
1699
+
1700
+ return result
1701
+
1702
+ async def browser_enter(self) -> Dict[str, Any]:
1703
+ r"""Simulates pressing the Enter key on the currently focused
1704
+ element.
1705
+
1706
+ This is useful for submitting forms or search queries after using the
1707
+ `type` tool.
1708
+
1709
+ Returns:
1710
+ Dict[str, Any]: A dictionary with the result of the action:
1711
+ - "result" (str): Confirmation of the action.
1712
+ - "snapshot" (str): A new page snapshot, as this action often
1713
+ triggers navigation.
1714
+ - "tabs" (List[Dict]): Information about all open tabs.
1715
+ - "current_tab" (int): Index of the active tab.
1716
+ - "total_tabs" (int): Total number of open tabs.
1717
+ """
1718
+ # Always press Enter on the currently focused element
1719
+ action = {"type": "enter"}
1720
+
1721
+ result = await self._exec_with_snapshot(action)
1722
+
1723
+ # Add tab information to the result
1724
+ tab_info = await self._get_tab_info_for_output()
1725
+ result.update(tab_info)
1726
+
1727
+ return result
1728
+
1729
+ @action_logger
1730
+ async def browser_mouse_control(
1731
+ self, *, control: str, x: float, y: float
1732
+ ) -> Dict[str, Any]:
1733
+ r"""Control the mouse to interact with browser with x, y coordinates
1734
+
1735
+ Args:
1736
+ control (str): The action to perform: 'click', 'right_click'
1737
+ or 'dblclick'.
1738
+ x (float): x-coordinate for the control action.
1739
+ y (float): y-coordinate for the control action.
1740
+
1741
+ Returns:
1742
+ Dict[str, Any]: A dictionary with the result of the action:
1743
+ - "result" (str): Confirmation of the action.
1744
+ - "snapshot" (str): A new page snapshot.
1745
+ - "tabs" (List[Dict]): Information about all open tabs.
1746
+ - "current_tab" (int): Index of the active tab.
1747
+ - "total_tabs" (int): Total number of open tabs.
1748
+ """
1749
+ if control not in ("click", "right_click", "dblclick"):
1750
+ tab_info = await self._get_tab_info_for_output()
1751
+ return {
1752
+ "result": "Error: supported control actions are "
1753
+ "'click' or 'dblclick'",
1754
+ "snapshot": "",
1755
+ **tab_info,
1756
+ }
1757
+
1758
+ action = {"type": "mouse_control", "control": control, "x": x, "y": y}
1759
+
1760
+ result = await self._exec_with_snapshot(action)
1761
+
1762
+ # Add tab information to the result
1763
+ tab_info = await self._get_tab_info_for_output()
1764
+ result.update(tab_info)
1765
+
1766
+ return result
1767
+
1768
+ @action_logger
1769
+ async def browser_mouse_drag(
1770
+ self, *, from_ref: str, to_ref: str
1771
+ ) -> Dict[str, Any]:
1772
+ r"""Control the mouse to drag and drop in the browser using ref IDs.
1773
+
1774
+ Args:
1775
+ from_ref (str): The `ref` ID of the source element to drag from.
1776
+ to_ref (str): The `ref` ID of the target element to drag to.
1777
+
1778
+ Returns:
1779
+ Dict[str, Any]: A dictionary with the result of the action:
1780
+ - "result" (str): Confirmation of the action.
1781
+ - "snapshot" (str): A new page snapshot.
1782
+ - "tabs" (List[Dict]): Information about all open tabs.
1783
+ - "current_tab" (int): Index of the active tab.
1784
+ - "total_tabs" (int): Total number of open tabs.
1785
+ """
1786
+ # Validate refs
1787
+ self._validate_ref(from_ref, "drag source")
1788
+ self._validate_ref(to_ref, "drag target")
1789
+
1790
+ # Get element analysis to find coordinates
1791
+ analysis = await self._get_unified_analysis()
1792
+ elements = analysis.get("elements", {})
1793
+
1794
+ if from_ref not in elements:
1795
+ logger.error(
1796
+ f"Error: Source element reference '{from_ref}' not found."
1797
+ )
1798
+ snapshot = self._format_snapshot_from_analysis(analysis)
1799
+ tab_info = await self._get_tab_info_for_output()
1800
+ return {
1801
+ "result": (
1802
+ f"Error: Source element reference '{from_ref}' not found."
1803
+ ),
1804
+ "snapshot": snapshot,
1805
+ **tab_info,
1806
+ }
1807
+
1808
+ if to_ref not in elements:
1809
+ logger.error(
1810
+ f"Error: Target element reference '{to_ref}' not found."
1811
+ )
1812
+ snapshot = self._format_snapshot_from_analysis(analysis)
1813
+ tab_info = await self._get_tab_info_for_output()
1814
+ return {
1815
+ "result": (
1816
+ f"Error: Target element reference '{to_ref}' not found."
1817
+ ),
1818
+ "snapshot": snapshot,
1819
+ **tab_info,
1820
+ }
1821
+
1822
+ action = {
1823
+ "type": "mouse_drag",
1824
+ "from_ref": from_ref,
1825
+ "to_ref": to_ref,
1826
+ }
1827
+
1828
+ result = await self._exec_with_snapshot(action)
1829
+
1830
+ # Add tab information to the result
1831
+ tab_info = await self._get_tab_info_for_output()
1832
+ result.update(tab_info)
1833
+
1834
+ return result
1835
+
1836
+ @action_logger
1837
+ async def browser_press_key(self, *, keys: List[str]) -> Dict[str, Any]:
1838
+ r"""Press key and key combinations.
1839
+ Supports single key press or combination of keys by concatenating
1840
+ them with '+' separator.
1841
+
1842
+ Args:
1843
+ keys (List[str]): key or list of keys.
1844
+
1845
+ Returns:
1846
+ Dict[str, Any]: A dictionary with the result of the action:
1847
+ - "result" (str): Confirmation of the action.
1848
+ - "snapshot" (str): A new page snapshot.
1849
+ - "tabs" (List[Dict]): Information about all open tabs.
1850
+ - "current_tab" (int): Index of the active tab.
1851
+ - "total_tabs" (int): Total number of open tabs.
1852
+ """
1853
+ if not isinstance(keys, list) or not all(
1854
+ isinstance(item, str) for item in keys
1855
+ ):
1856
+ tab_info = await self._get_tab_info_for_output()
1857
+ return {
1858
+ "result": "Error: Expected keys as a list of strings.",
1859
+ "snapshot": "",
1860
+ **tab_info,
1861
+ }
1862
+ action = {"type": "press_key", "keys": keys}
1863
+
1864
+ result = await self._exec_with_snapshot(action)
1865
+
1866
+ # Add tab information to the result
1867
+ tab_info = await self._get_tab_info_for_output()
1868
+ result.update(tab_info)
1869
+
1870
+ return result
1871
+
1872
+ @action_logger
1873
+ async def browser_wait_user(
1874
+ self, timeout_sec: Optional[float] = None
1875
+ ) -> Dict[str, Any]:
1876
+ r"""Pauses execution and waits for human input from the console.
1877
+
1878
+ Use this for tasks requiring manual steps, like solving a CAPTCHA.
1879
+ The
1880
+ agent will resume after the user presses Enter in the console.
1881
+
1882
+ Args:
1883
+ timeout_sec (Optional[float]): Max time to wait in seconds. If
1884
+ `None`, it will wait indefinitely.
1885
+
1886
+ Returns:
1887
+ Dict[str, Any]: A dictionary with the result of the action:
1888
+ - "result" (str): A message indicating how the wait ended.
1889
+ - "snapshot" (str): The page snapshot after the wait.
1890
+ - "tabs" (List[Dict]): Information about all open tabs.
1891
+ - "current_tab" (int): Index of the active tab.
1892
+ - "total_tabs" (int): Total number of open tabs.
1893
+ """
1894
+ import asyncio
1895
+
1896
+ prompt = (
1897
+ "🕑 Agent waiting for human input. "
1898
+ "Complete action in browser, then press Enter..."
1899
+ )
1900
+ logger.info(f"\n{prompt}\n")
1901
+
1902
+ async def _await_enter():
1903
+ await asyncio.to_thread(input, ">>> Press Enter to resume <<<\n")
1904
+
1905
+ try:
1906
+ if timeout_sec is not None:
1907
+ logger.info(
1908
+ f"Waiting for user input with timeout: {timeout_sec}s"
1909
+ )
1910
+ start_time = time.time()
1911
+ await asyncio.wait_for(_await_enter(), timeout=timeout_sec)
1912
+ wait_time = time.time() - start_time
1913
+ logger.info(f"User input received after {wait_time:.2f}s")
1914
+ result_msg = "User resumed."
1915
+ else:
1916
+ logger.info("Waiting for user " "input (no timeout)")
1917
+ start_time = time.time()
1918
+ await _await_enter()
1919
+ wait_time = time.time() - start_time
1920
+ logger.info(f"User input received " f"after {wait_time:.2f}s")
1921
+ result_msg = "User resumed."
1922
+ except asyncio.TimeoutError:
1923
+ wait_time = timeout_sec or 0.0
1924
+ logger.info(
1925
+ f"User input timeout reached "
1926
+ f"after {wait_time}s, auto-resuming"
1927
+ )
1928
+ result_msg = f"Timeout {timeout_sec}s reached, auto-resumed."
1929
+
1930
+ snapshot = await self._session.get_snapshot(
1931
+ force_refresh=True, diff_only=False
1932
+ )
1933
+ tab_info = await self._get_tab_info_for_output()
1934
+
1935
+ return {"result": result_msg, "snapshot": snapshot, **tab_info}
1936
+
1937
+ @action_logger
1938
+ async def browser_get_page_links(
1939
+ self, *, ref: List[str]
1940
+ ) -> Dict[str, Any]:
1941
+ r"""Gets the destination URLs for a list of link elements.
1942
+
1943
+ This is useful to know where a link goes before clicking it.
1944
+
1945
+ Args:
1946
+ ref (List[str]): A list of `ref` IDs for link elements, obtained
1947
+ from a page snapshot.
1948
+
1949
+ Returns:
1950
+ Dict[str, Any]: A dictionary containing:
1951
+ - "links" (List[Dict]): A list of found links, where each
1952
+ link has "text", "ref", and "url" keys.
1953
+ """
1954
+ if not ref or not isinstance(ref, list):
1955
+ return {"links": []}
1956
+
1957
+ for r in ref:
1958
+ if not r or not isinstance(r, str):
1959
+ return {"links": []}
1960
+
1961
+ page = await self._require_page()
1962
+ snapshot = await self._session.get_snapshot(
1963
+ force_refresh=True, diff_only=False
1964
+ )
1965
+ links = await self._extract_links_by_refs(snapshot, page, ref)
1966
+
1967
+ return {"links": links}
1968
+
1969
+ @action_logger
1970
+ async def browser_solve_task(
1971
+ self, task_prompt: str, start_url: str, max_steps: int = 15
1972
+ ) -> str:
1973
+ r"""Delegates a complex, high-level task to a specialized web agent.
1974
+
1975
+ Use this for multi-step tasks that can be described in a single
1976
+ prompt
1977
+ (e.g., "log into my account and check for new messages"). The agent
1978
+ will autonomously perform the necessary browser actions.
1979
+
1980
+ NOTE: This is a high-level action; for simple interactions, use tools
1981
+ like `click` and `type`. `web_agent_model` must be provided during
1982
+ toolkit initialization.
1983
+
1984
+ Args:
1985
+ task_prompt (str): A natural language description of the task.
1986
+ start_url (str): The URL to start the task from. This should be a
1987
+ valid and existing URL, as agents may generate non-existent
1988
+ ones.
1989
+ max_steps (int): The maximum number of steps the agent can take.
1990
+
1991
+ Returns:
1992
+ str: A summary message indicating the task has finished.
1993
+ """
1994
+ agent = self._ensure_agent()
1995
+ await agent.navigate(start_url)
1996
+ await agent.process_command(task_prompt, max_steps=max_steps)
1997
+ return "Task processing finished - see stdout for detailed trace."
1998
+
1999
+ @action_logger
2000
+ async def browser_console_view(self) -> Dict[str, Any]:
2001
+ r"""View current page console logs.
2002
+
2003
+ Returns:
2004
+ Dict[str, Any]: A dictionary with the result of the action:
2005
+ - console_messages (List[Dict]) : collection of logs from the
2006
+ browser console
2007
+ """
2008
+ try:
2009
+ logs = await self._session.get_console_logs()
2010
+ # make output JSON serializable
2011
+ return {"console_messages": list(logs)}
2012
+ except Exception as e:
2013
+ logger.warning(f"Failed to retrieve logs: {e}")
2014
+ return {"console_messages": []}
2015
+
2016
+ async def browser_console_exec(self, code: str) -> Dict[str, Any]:
2017
+ r"""Execute javascript code in the console of the current page and get
2018
+ results.
2019
+
2020
+ Args:
2021
+ code (str): JavaScript code for execution.
2022
+
2023
+ Returns:
2024
+ Dict[str, Any]: A dictionary with the result of the action:
2025
+ - "result" (str): Result of the action.
2026
+ - "console_output" (List[str]): Console log outputs during
2027
+ execution.
2028
+ - "snapshot" (str): A new page snapshot.
2029
+ - "tabs" (List[Dict]): Information about all open tabs.
2030
+ - "current_tab" (int): Index of the active tab.
2031
+ - "total_tabs" (int): Total number of open tabs.
2032
+ """
2033
+ page = await self._require_page()
2034
+
2035
+ try:
2036
+ logger.info("Executing JavaScript code in browser console.")
2037
+ exec_start = time.time()
2038
+
2039
+ # Wrap the code to capture console.log output and handle
2040
+ # expressions
2041
+ wrapped_code = (
2042
+ """
2043
+ (function() {
2044
+ const _logs = [];
2045
+ const originalLog = console.log;
2046
+ console.log = function(...args) {
2047
+ _logs.push(args.map(arg => {
2048
+ try {
2049
+ return typeof arg === 'object' ?
2050
+ JSON.stringify(arg) : String(arg);
2051
+ } catch (e) {
2052
+ return String(arg);
2053
+ }
2054
+ }).join(' '));
2055
+ originalLog.apply(console, args);
2056
+ };
2057
+
2058
+ let result;
2059
+ try {
2060
+ // First try to evaluate as an expression
2061
+ // (like browser console)
2062
+ result = eval("""
2063
+ + repr(code)
2064
+ + """);
2065
+ } catch (e) {
2066
+ // If that fails, execute as statements
2067
+ try {
2068
+ result = (function() { """
2069
+ + code
2070
+ + """ })();
2071
+ } catch (error) {
2072
+ console.log = originalLog;
2073
+ throw error;
2074
+ }
2075
+ }
2076
+
2077
+ console.log = originalLog;
2078
+ return { result, logs: _logs };
2079
+ })()
2080
+ """
2081
+ )
2082
+
2083
+ eval_result = await page.evaluate(wrapped_code)
2084
+ result = eval_result.get('result')
2085
+ console_logs = eval_result.get('logs', [])
2086
+
2087
+ exec_time = time.time() - exec_start
2088
+ logger.info(f"Code execution completed in {exec_time:.2f}s.")
2089
+
2090
+ import asyncio
2091
+ import json
2092
+
2093
+ await asyncio.sleep(0.2)
2094
+
2095
+ # Get snapshot
2096
+ logger.info("Capturing page snapshot after code execution.")
2097
+ snapshot_start = time.time()
2098
+ snapshot = await self._session.get_snapshot(
2099
+ force_refresh=True, diff_only=False
2100
+ )
2101
+ snapshot_time = time.time() - snapshot_start
2102
+ logger.info(
2103
+ f"Code execution snapshot captured in " f"{snapshot_time:.2f}s"
2104
+ )
2105
+
2106
+ # Get tab information
2107
+ tab_info = await self._get_tab_info_for_output()
2108
+
2109
+ # Properly serialize the result
2110
+ try:
2111
+ result_str = json.dumps(result, indent=2)
2112
+ except (TypeError, ValueError):
2113
+ result_str = str(result)
2114
+
2115
+ return {
2116
+ "result": f"Code execution result: {result_str}",
2117
+ "console_output": console_logs,
2118
+ "snapshot": snapshot,
2119
+ **tab_info,
2120
+ }
2121
+
2122
+ except Exception as e:
2123
+ logger.warning(f"Code execution failed: {e}")
2124
+ # Get tab information for error case
2125
+ try:
2126
+ tab_info = await self._get_tab_info_for_output()
2127
+ except Exception:
2128
+ tab_info = {
2129
+ "tabs": [],
2130
+ "current_tab": 0,
2131
+ "total_tabs": 0,
2132
+ }
2133
+
2134
+ return {
2135
+ "result": f"Code execution failed: {e}",
2136
+ "console_output": [],
2137
+ "snapshot": "",
2138
+ **tab_info,
2139
+ }
2140
+
2141
+ def get_log_summary(self) -> Dict[str, Any]:
2142
+ r"""Get a summary of logged actions."""
2143
+ if not self.log_buffer:
2144
+ return {"total_actions": 0, "summary": "No actions logged"}
2145
+
2146
+ total_actions = len(self.log_buffer)
2147
+ total_execution_time = sum(
2148
+ entry.get("execution_time_ms", 0) for entry in self.log_buffer
2149
+ )
2150
+ total_page_load_time = sum(
2151
+ entry.get("page_load_time_ms", 0)
2152
+ for entry in self.log_buffer
2153
+ if "page_load_time_ms" in entry
2154
+ )
2155
+
2156
+ action_counts: Dict[str, int] = {}
2157
+ error_count = 0
2158
+
2159
+ for entry in self.log_buffer:
2160
+ action = entry["action"]
2161
+ action_counts[action] = action_counts.get(action, 0) + 1
2162
+ if "error" in entry:
2163
+ error_count += 1
2164
+
2165
+ return {
2166
+ "total_actions": total_actions,
2167
+ "total_execution_time_ms": round(total_execution_time, 2),
2168
+ "total_page_load_time_ms": round(total_page_load_time, 2),
2169
+ "action_counts": action_counts,
2170
+ "error_count": error_count,
2171
+ "success_rate": round(
2172
+ (total_actions - error_count) / total_actions * 100, 2
2173
+ )
2174
+ if total_actions > 0
2175
+ else 0,
2176
+ }
2177
+
2178
+ def clear_logs(self) -> None:
2179
+ r"""Clear the log buffer."""
2180
+ self.log_buffer.clear()
2181
+ logger.info("Log buffer cleared")
2182
+
2183
+ def clone_for_new_session(
2184
+ self, new_session_id: Optional[str] = None
2185
+ ) -> "HybridBrowserToolkit":
2186
+ r"""Create a new instance of HybridBrowserToolkit with a unique
2187
+ session.
2188
+
2189
+ Args:
2190
+ new_session_id: Optional new session ID. If None, a UUID will be
2191
+ generated.
2192
+
2193
+ Returns:
2194
+ A new HybridBrowserToolkit instance with the same configuration
2195
+ but a different session.
2196
+ """
2197
+ import uuid
2198
+
2199
+ if new_session_id is None:
2200
+ new_session_id = str(uuid.uuid4())[:8]
2201
+
2202
+ return HybridBrowserToolkit(
2203
+ headless=self._headless,
2204
+ user_data_dir=self._user_data_dir,
2205
+ stealth=self._stealth,
2206
+ web_agent_model=self._web_agent_model,
2207
+ cache_dir=f"{self._cache_dir.rstrip('/')}_clone_"
2208
+ f"{new_session_id}/",
2209
+ enabled_tools=self.enabled_tools.copy(),
2210
+ browser_log_to_file=self._browser_log_to_file,
2211
+ session_id=new_session_id,
2212
+ default_start_url=self._default_start_url,
2213
+ default_timeout=self._default_timeout,
2214
+ short_timeout=self._short_timeout,
2215
+ navigation_timeout=self._navigation_timeout,
2216
+ network_idle_timeout=self._network_idle_timeout,
2217
+ screenshot_timeout=self._screenshot_timeout,
2218
+ page_stability_timeout=self._page_stability_timeout,
2219
+ dom_content_loaded_timeout=self._dom_content_loaded_timeout,
2220
+ )
2221
+
2222
+ @action_logger
2223
+ async def browser_switch_tab(self, *, tab_id: str) -> Dict[str, Any]:
2224
+ r"""Switches to a different browser tab using its ID.
2225
+
2226
+ After switching, all actions will apply to the new tab. Use
2227
+ `get_tab_info` to find the ID of the tab you want to switch to.
2228
+
2229
+ Args:
2230
+ tab_id (str): The ID of the tab to activate.
2231
+
2232
+ Returns:
2233
+ Dict[str, Any]: A dictionary with the result of the action:
2234
+ - "result" (str): Confirmation of the action.
2235
+ - "snapshot" (str): A snapshot of the newly active tab.
2236
+ - "tabs" (List[Dict]): Information about all open tabs.
2237
+ - "current_tab" (int): Index of the new active tab.
2238
+ - "total_tabs" (int): Total number of open tabs.
2239
+ """
2240
+ await self._ensure_browser()
2241
+ session = await self._get_session()
2242
+
2243
+ success = await session.switch_to_tab(tab_id)
2244
+
2245
+ if success:
2246
+ snapshot = await session.get_snapshot(
2247
+ force_refresh=True, diff_only=False
2248
+ )
2249
+ tab_info = await self._get_tab_info_for_output()
2250
+
2251
+ result = {
2252
+ "result": f"Successfully switched to tab {tab_id}",
2253
+ "snapshot": snapshot,
2254
+ **tab_info,
2255
+ }
2256
+ else:
2257
+ tab_info = await self._get_tab_info_for_output()
2258
+ result = {
2259
+ "result": f"Failed to switch to tab {tab_id}. Tab may not "
2260
+ f"exist.",
2261
+ "snapshot": "",
2262
+ **tab_info,
2263
+ }
2264
+
2265
+ return result
2266
+
2267
+ @action_logger
2268
+ async def browser_close_tab(self, *, tab_id: str) -> Dict[str, Any]:
2269
+ r"""Closes a browser tab using its ID.
2270
+
2271
+ Use `get_tab_info` to find the ID of the tab to close. After
2272
+ closing, the browser will switch to another tab if available.
2273
+
2274
+ Args:
2275
+ tab_id (str): The ID of the tab to close.
2276
+
2277
+ Returns:
2278
+ Dict[str, Any]: A dictionary with the result of the action:
2279
+ - "result" (str): Confirmation of the action.
2280
+ - "snapshot" (str): A snapshot of the active tab after
2281
+ closure.
2282
+ - "tabs" (List[Dict]): Information about remaining tabs.
2283
+ - "current_tab" (int): Index of the new active tab.
2284
+ - "total_tabs" (int): Total number of remaining tabs.
2285
+ """
2286
+ await self._ensure_browser()
2287
+ session = await self._get_session()
2288
+
2289
+ success = await session.close_tab(tab_id)
2290
+
2291
+ if success:
2292
+ # Get current state after closing the tab
2293
+ try:
2294
+ snapshot = await session.get_snapshot(
2295
+ force_refresh=True, diff_only=False
2296
+ )
2297
+ except Exception:
2298
+ snapshot = "" # No active tab
2299
+
2300
+ tab_info = await self._get_tab_info_for_output()
2301
+
2302
+ result = {
2303
+ "result": f"Successfully closed tab {tab_id}",
2304
+ "snapshot": snapshot,
2305
+ **tab_info,
2306
+ }
2307
+ else:
2308
+ tab_info = await self._get_tab_info_for_output()
2309
+ result = {
2310
+ "result": f"Failed to close tab {tab_id}. Tab may not "
2311
+ f"exist.",
2312
+ "snapshot": "",
2313
+ **tab_info,
2314
+ }
2315
+
2316
+ return result
2317
+
2318
+ @action_logger
2319
+ async def browser_get_tab_info(self) -> Dict[str, Any]:
2320
+ r"""Gets a list of all open browser tabs and their information.
2321
+
2322
+ This includes each tab's index, title, and URL, and indicates which
2323
+ tab is currently active. Use this to manage multiple tabs.
2324
+
2325
+ Returns:
2326
+ Dict[str, Any]: A dictionary with tab information:
2327
+ - "tabs" (List[Dict]): A list of open tabs, each with:
2328
+ - "index" (int): The tab's zero-based index.
2329
+ - "title" (str): The page title.
2330
+ - "url" (str): The current URL.
2331
+ - "is_current" (bool): True if the tab is active.
2332
+ - "current_tab" (int): Index of the active tab.
2333
+ - "total_tabs" (int): Total number of open tabs.
2334
+ """
2335
+ await self._ensure_browser()
2336
+ return await self._get_tab_info_for_output()
2337
+
2338
+ def get_tools(self) -> List[FunctionTool]:
2339
+ r"""Get available function tools
2340
+ based on enabled_tools configuration."""
2341
+ # Map tool names to their corresponding methods
2342
+ tool_map = {
2343
+ "browser_open": self.browser_open,
2344
+ "browser_close": self.browser_close,
2345
+ "browser_visit_page": self.browser_visit_page,
2346
+ "browser_back": self.browser_back,
2347
+ "browser_forward": self.browser_forward,
2348
+ "browser_get_page_snapshot": self.browser_get_page_snapshot,
2349
+ "browser_get_som_screenshot": self.browser_get_som_screenshot,
2350
+ "browser_get_page_links": self.browser_get_page_links,
2351
+ "browser_click": self.browser_click,
2352
+ "browser_type": self.browser_type,
2353
+ "browser_select": self.browser_select,
2354
+ "browser_scroll": self.browser_scroll,
2355
+ "browser_enter": self.browser_enter,
2356
+ "browser_mouse_control": self.browser_mouse_control,
2357
+ "browser_mouse_drag": self.browser_mouse_drag,
2358
+ "browser_press_key": self.browser_press_key,
2359
+ "browser_wait_user": self.browser_wait_user,
2360
+ "browser_solve_task": self.browser_solve_task,
2361
+ "browser_switch_tab": self.browser_switch_tab,
2362
+ "browser_close_tab": self.browser_close_tab,
2363
+ "browser_get_tab_info": self.browser_get_tab_info,
2364
+ "browser_console_view": self.browser_console_view,
2365
+ "browser_console_exec": self.browser_console_exec,
2366
+ }
2367
+
2368
+ enabled_tools = []
2369
+
2370
+ for tool_name in self.enabled_tools:
2371
+ if (
2372
+ tool_name == "browser_solve_task"
2373
+ and self._web_agent_model is None
2374
+ ):
2375
+ logger.warning(
2376
+ f"Tool '{tool_name}' is enabled but web_agent_model "
2377
+ f"is not provided. Skipping this tool."
2378
+ )
2379
+ continue
2380
+
2381
+ if tool_name in tool_map:
2382
+ tool = FunctionTool(
2383
+ cast(Callable[..., Any], tool_map[tool_name])
2384
+ )
2385
+ enabled_tools.append(tool)
2386
+ else:
2387
+ logger.warning(f"Unknown tool name: {tool_name}")
2388
+
2389
+ logger.info(f"Returning {len(enabled_tools)} enabled tools")
2390
+ return enabled_tools