camel-ai 0.2.59__py3-none-any.whl → 0.2.82__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

Files changed (506) hide show
  1. camel/__init__.py +3 -3
  2. camel/agents/__init__.py +2 -2
  3. camel/agents/_types.py +9 -4
  4. camel/agents/_utils.py +40 -2
  5. camel/agents/base.py +2 -2
  6. camel/agents/chat_agent.py +5012 -902
  7. camel/agents/critic_agent.py +2 -2
  8. camel/agents/deductive_reasoner_agent.py +56 -56
  9. camel/agents/embodied_agent.py +2 -2
  10. camel/agents/knowledge_graph_agent.py +20 -20
  11. camel/agents/mcp_agent.py +39 -36
  12. camel/agents/multi_hop_generator_agent.py +3 -3
  13. camel/agents/programmed_agent_instruction.py +2 -2
  14. camel/agents/repo_agent.py +4 -3
  15. camel/agents/role_assignment_agent.py +2 -2
  16. camel/agents/search_agent.py +2 -2
  17. camel/agents/task_agent.py +2 -2
  18. camel/agents/tool_agents/__init__.py +2 -2
  19. camel/agents/tool_agents/base.py +2 -2
  20. camel/agents/tool_agents/hugging_face_tool_agent.py +3 -3
  21. camel/benchmarks/__init__.py +2 -2
  22. camel/benchmarks/apibank.py +5 -5
  23. camel/benchmarks/apibench.py +2 -2
  24. camel/benchmarks/base.py +2 -2
  25. camel/benchmarks/browsecomp.py +44 -33
  26. camel/benchmarks/gaia.py +17 -13
  27. camel/benchmarks/mock_website/README.md +94 -0
  28. camel/benchmarks/mock_website/mock_web.py +299 -0
  29. camel/benchmarks/mock_website/requirements.txt +3 -0
  30. camel/benchmarks/mock_website/shopping_mall/app.py +465 -0
  31. camel/benchmarks/mock_website/task.json +104 -0
  32. camel/benchmarks/nexus.py +3 -3
  33. camel/benchmarks/ragbench.py +2 -2
  34. camel/bots/__init__.py +2 -2
  35. camel/bots/discord/__init__.py +2 -2
  36. camel/bots/discord/discord_app.py +2 -2
  37. camel/bots/discord/discord_installation.py +2 -2
  38. camel/bots/discord/discord_store.py +3 -3
  39. camel/bots/slack/__init__.py +2 -2
  40. camel/bots/slack/models.py +4 -4
  41. camel/bots/slack/slack_app.py +2 -2
  42. camel/bots/telegram_bot.py +2 -2
  43. camel/configs/__init__.py +26 -2
  44. camel/configs/aihubmix_config.py +90 -0
  45. camel/configs/aiml_config.py +2 -2
  46. camel/configs/amd_config.py +70 -0
  47. camel/configs/anthropic_config.py +8 -7
  48. camel/configs/base_config.py +2 -2
  49. camel/configs/bedrock_config.py +5 -3
  50. camel/configs/cerebras_config.py +98 -0
  51. camel/configs/cohere_config.py +3 -3
  52. camel/configs/cometapi_config.py +106 -0
  53. camel/configs/crynux_config.py +94 -0
  54. camel/configs/deepseek_config.py +9 -8
  55. camel/configs/gemini_config.py +6 -4
  56. camel/configs/groq_config.py +6 -4
  57. camel/configs/internlm_config.py +6 -4
  58. camel/configs/litellm_config.py +2 -2
  59. camel/configs/lmstudio_config.py +6 -4
  60. camel/configs/minimax_config.py +95 -0
  61. camel/configs/mistral_config.py +3 -3
  62. camel/configs/modelscope_config.py +5 -3
  63. camel/configs/moonshot_config.py +2 -2
  64. camel/configs/nebius_config.py +105 -0
  65. camel/configs/netmind_config.py +2 -2
  66. camel/configs/novita_config.py +2 -2
  67. camel/configs/nvidia_config.py +2 -2
  68. camel/configs/ollama_config.py +2 -2
  69. camel/configs/openai_config.py +8 -3
  70. camel/configs/openrouter_config.py +6 -4
  71. camel/configs/ppio_config.py +2 -2
  72. camel/configs/qianfan_config.py +85 -0
  73. camel/configs/qwen_config.py +2 -2
  74. camel/configs/reka_config.py +3 -3
  75. camel/configs/samba_config.py +8 -6
  76. camel/configs/sglang_config.py +2 -2
  77. camel/configs/siliconflow_config.py +2 -2
  78. camel/configs/togetherai_config.py +2 -2
  79. camel/configs/vllm_config.py +4 -2
  80. camel/configs/watsonx_config.py +2 -2
  81. camel/configs/yi_config.py +6 -4
  82. camel/configs/zhipuai_config.py +6 -4
  83. camel/{data_collector → data_collectors}/__init__.py +2 -2
  84. camel/{data_collector → data_collectors}/alpaca_collector.py +19 -10
  85. camel/{data_collector → data_collectors}/base.py +2 -2
  86. camel/{data_collector → data_collectors}/sharegpt_collector.py +3 -3
  87. camel/datagen/__init__.py +2 -2
  88. camel/datagen/cot_datagen.py +32 -37
  89. camel/datagen/evol_instruct/__init__.py +2 -2
  90. camel/datagen/evol_instruct/evol_instruct.py +2 -2
  91. camel/datagen/evol_instruct/scorer.py +24 -25
  92. camel/datagen/evol_instruct/templates.py +48 -48
  93. camel/datagen/self_improving_cot.py +5 -5
  94. camel/datagen/self_instruct/__init__.py +2 -2
  95. camel/datagen/self_instruct/filter/__init__.py +2 -2
  96. camel/datagen/self_instruct/filter/filter_function.py +2 -2
  97. camel/datagen/self_instruct/filter/filter_registry.py +2 -2
  98. camel/datagen/self_instruct/filter/instruction_filter.py +2 -2
  99. camel/datagen/self_instruct/self_instruct.py +2 -2
  100. camel/datagen/self_instruct/templates.py +47 -47
  101. camel/datagen/source2synth/__init__.py +2 -2
  102. camel/datagen/source2synth/data_processor.py +2 -2
  103. camel/datagen/source2synth/models.py +2 -2
  104. camel/datagen/source2synth/user_data_processor_config.py +2 -2
  105. camel/datahubs/__init__.py +2 -2
  106. camel/datahubs/base.py +2 -2
  107. camel/datahubs/huggingface.py +2 -2
  108. camel/datahubs/models.py +2 -2
  109. camel/datasets/__init__.py +2 -2
  110. camel/datasets/base_generator.py +41 -12
  111. camel/datasets/few_shot_generator.py +18 -18
  112. camel/datasets/models.py +3 -3
  113. camel/datasets/self_instruct_generator.py +2 -2
  114. camel/datasets/static_dataset.py +152 -2
  115. camel/embeddings/__init__.py +2 -2
  116. camel/embeddings/azure_embedding.py +2 -2
  117. camel/embeddings/base.py +2 -2
  118. camel/embeddings/gemini_embedding.py +2 -2
  119. camel/embeddings/jina_embedding.py +10 -3
  120. camel/embeddings/mistral_embedding.py +2 -2
  121. camel/embeddings/openai_compatible_embedding.py +2 -2
  122. camel/embeddings/openai_embedding.py +2 -2
  123. camel/embeddings/sentence_transformers_embeddings.py +4 -4
  124. camel/embeddings/together_embedding.py +2 -2
  125. camel/embeddings/vlm_embedding.py +11 -4
  126. camel/environments/__init__.py +14 -2
  127. camel/environments/models.py +2 -2
  128. camel/environments/multi_step.py +2 -2
  129. camel/environments/rlcards_env.py +860 -0
  130. camel/environments/single_step.py +30 -5
  131. camel/environments/tic_tac_toe.py +3 -3
  132. camel/extractors/__init__.py +2 -2
  133. camel/extractors/base.py +2 -2
  134. camel/extractors/python_strategies.py +2 -2
  135. camel/generators.py +2 -2
  136. camel/human.py +2 -2
  137. camel/interpreters/__init__.py +4 -2
  138. camel/interpreters/base.py +16 -3
  139. camel/interpreters/docker/Dockerfile +53 -7
  140. camel/interpreters/docker_interpreter.py +70 -11
  141. camel/interpreters/e2b_interpreter.py +59 -11
  142. camel/interpreters/internal_python_interpreter.py +81 -4
  143. camel/interpreters/interpreter_error.py +2 -2
  144. camel/interpreters/ipython_interpreter.py +23 -5
  145. camel/interpreters/microsandbox_interpreter.py +395 -0
  146. camel/interpreters/subprocess_interpreter.py +36 -4
  147. camel/loaders/__init__.py +17 -5
  148. camel/loaders/apify_reader.py +2 -2
  149. camel/loaders/base_io.py +2 -2
  150. camel/loaders/base_loader.py +85 -0
  151. camel/loaders/chunkr_reader.py +128 -93
  152. camel/loaders/crawl4ai_reader.py +2 -2
  153. camel/loaders/firecrawl_reader.py +6 -6
  154. camel/loaders/jina_url_reader.py +2 -2
  155. camel/loaders/markitdown.py +2 -2
  156. camel/loaders/mineru_extractor.py +2 -2
  157. camel/loaders/mistral_reader.py +148 -0
  158. camel/loaders/scrapegraph_reader.py +2 -2
  159. camel/loaders/unstructured_io.py +2 -2
  160. camel/logger.py +5 -5
  161. camel/memories/__init__.py +2 -2
  162. camel/memories/agent_memories.py +86 -3
  163. camel/memories/base.py +36 -2
  164. camel/memories/blocks/__init__.py +2 -2
  165. camel/memories/blocks/chat_history_block.py +126 -9
  166. camel/memories/blocks/vectordb_block.py +10 -3
  167. camel/memories/context_creators/__init__.py +2 -2
  168. camel/memories/context_creators/score_based.py +31 -239
  169. camel/memories/records.py +98 -13
  170. camel/messages/__init__.py +2 -2
  171. camel/messages/base.py +193 -46
  172. camel/messages/conversion/__init__.py +2 -2
  173. camel/messages/conversion/alpaca.py +2 -2
  174. camel/messages/conversion/conversation_models.py +2 -2
  175. camel/messages/conversion/sharegpt/__init__.py +2 -2
  176. camel/messages/conversion/sharegpt/function_call_formatter.py +2 -2
  177. camel/messages/conversion/sharegpt/hermes/__init__.py +2 -2
  178. camel/messages/conversion/sharegpt/hermes/hermes_function_formatter.py +2 -2
  179. camel/messages/func_message.py +54 -17
  180. camel/models/__init__.py +18 -2
  181. camel/models/_utils.py +3 -3
  182. camel/models/aihubmix_model.py +83 -0
  183. camel/models/aiml_model.py +11 -18
  184. camel/models/amd_model.py +101 -0
  185. camel/models/anthropic_model.py +127 -20
  186. camel/models/aws_bedrock_model.py +12 -35
  187. camel/models/azure_openai_model.py +263 -63
  188. camel/models/base_audio_model.py +5 -3
  189. camel/models/base_model.py +195 -26
  190. camel/models/cerebras_model.py +83 -0
  191. camel/models/cohere_model.py +81 -21
  192. camel/models/cometapi_model.py +83 -0
  193. camel/models/crynux_model.py +87 -0
  194. camel/models/deepseek_model.py +61 -59
  195. camel/models/fish_audio_model.py +8 -2
  196. camel/models/gemini_model.py +439 -30
  197. camel/models/groq_model.py +11 -19
  198. camel/models/internlm_model.py +11 -18
  199. camel/models/litellm_model.py +94 -34
  200. camel/models/lmstudio_model.py +17 -20
  201. camel/models/minimax_model.py +83 -0
  202. camel/models/mistral_model.py +84 -19
  203. camel/models/model_factory.py +49 -6
  204. camel/models/model_manager.py +33 -11
  205. camel/models/modelscope_model.py +13 -193
  206. camel/models/moonshot_model.py +195 -21
  207. camel/models/nebius_model.py +83 -0
  208. camel/models/nemotron_model.py +19 -9
  209. camel/models/netmind_model.py +11 -18
  210. camel/models/novita_model.py +11 -18
  211. camel/models/nvidia_model.py +11 -18
  212. camel/models/ollama_model.py +14 -21
  213. camel/models/openai_audio_models.py +2 -2
  214. camel/models/openai_compatible_model.py +234 -27
  215. camel/models/openai_model.py +255 -39
  216. camel/models/openrouter_model.py +11 -19
  217. camel/models/ppio_model.py +11 -18
  218. camel/models/qianfan_model.py +89 -0
  219. camel/models/qwen_model.py +13 -193
  220. camel/models/reka_model.py +90 -21
  221. camel/models/reward/__init__.py +2 -2
  222. camel/models/reward/base_reward_model.py +2 -2
  223. camel/models/reward/evaluator.py +2 -2
  224. camel/models/reward/nemotron_model.py +2 -2
  225. camel/models/reward/skywork_model.py +2 -2
  226. camel/models/samba_model.py +117 -49
  227. camel/models/sglang_model.py +162 -42
  228. camel/models/siliconflow_model.py +12 -35
  229. camel/models/stub_model.py +10 -7
  230. camel/models/togetherai_model.py +11 -18
  231. camel/models/vllm_model.py +10 -18
  232. camel/models/volcano_model.py +16 -20
  233. camel/models/watsonx_model.py +69 -19
  234. camel/models/yi_model.py +11 -18
  235. camel/models/zhipuai_model.py +70 -18
  236. camel/parsers/__init__.py +18 -0
  237. camel/parsers/mcp_tool_call_parser.py +176 -0
  238. camel/personas/__init__.py +2 -2
  239. camel/personas/persona.py +2 -2
  240. camel/personas/persona_hub.py +2 -2
  241. camel/prompts/__init__.py +2 -2
  242. camel/prompts/ai_society.py +2 -2
  243. camel/prompts/base.py +2 -2
  244. camel/prompts/code.py +2 -2
  245. camel/prompts/evaluation.py +2 -2
  246. camel/prompts/generate_text_embedding_data.py +2 -2
  247. camel/prompts/image_craft.py +2 -2
  248. camel/prompts/misalignment.py +2 -2
  249. camel/prompts/multi_condition_image_craft.py +2 -2
  250. camel/prompts/object_recognition.py +2 -2
  251. camel/prompts/persona_hub.py +3 -3
  252. camel/prompts/prompt_templates.py +2 -2
  253. camel/prompts/role_description_prompt_template.py +2 -2
  254. camel/prompts/solution_extraction.py +8 -8
  255. camel/prompts/task_prompt_template.py +2 -2
  256. camel/prompts/translation.py +2 -2
  257. camel/prompts/video_description_prompt.py +3 -3
  258. camel/responses/__init__.py +2 -2
  259. camel/responses/agent_responses.py +2 -2
  260. camel/retrievers/__init__.py +2 -2
  261. camel/retrievers/auto_retriever.py +23 -3
  262. camel/retrievers/base.py +2 -2
  263. camel/retrievers/bm25_retriever.py +3 -4
  264. camel/retrievers/cohere_rerank_retriever.py +2 -2
  265. camel/retrievers/hybrid_retrival.py +4 -4
  266. camel/retrievers/vector_retriever.py +2 -2
  267. camel/runtimes/Dockerfile.multi-toolkit +90 -0
  268. camel/{runtime → runtimes}/__init__.py +2 -2
  269. camel/runtimes/api.py +153 -0
  270. camel/{runtime → runtimes}/base.py +2 -2
  271. camel/{runtime → runtimes}/configs.py +13 -13
  272. camel/{runtime → runtimes}/daytona_runtime.py +18 -19
  273. camel/{runtime → runtimes}/docker_runtime.py +13 -13
  274. camel/{runtime → runtimes}/llm_guard_runtime.py +28 -28
  275. camel/{runtime → runtimes}/remote_http_runtime.py +12 -12
  276. camel/{runtime → runtimes}/ubuntu_docker_runtime.py +3 -3
  277. camel/{runtime → runtimes}/utils/__init__.py +2 -2
  278. camel/{runtime → runtimes}/utils/function_risk_toolkit.py +2 -2
  279. camel/{runtime → runtimes}/utils/ignore_risk_toolkit.py +2 -2
  280. camel/schemas/__init__.py +2 -2
  281. camel/schemas/base.py +2 -2
  282. camel/schemas/openai_converter.py +3 -3
  283. camel/schemas/outlines_converter.py +2 -2
  284. camel/services/agent_openapi_server.py +380 -0
  285. camel/societies/__init__.py +4 -2
  286. camel/societies/babyagi_playing.py +2 -2
  287. camel/societies/role_playing.py +201 -80
  288. camel/societies/workforce/__init__.py +10 -3
  289. camel/societies/workforce/base.py +9 -5
  290. camel/societies/workforce/events.py +143 -0
  291. camel/societies/workforce/prompts.py +258 -33
  292. camel/societies/workforce/role_playing_worker.py +95 -30
  293. camel/societies/workforce/single_agent_worker.py +659 -30
  294. camel/societies/workforce/structured_output_handler.py +512 -0
  295. camel/societies/workforce/task_channel.py +182 -38
  296. camel/societies/workforce/utils.py +784 -18
  297. camel/societies/workforce/worker.py +96 -28
  298. camel/societies/workforce/workflow_memory_manager.py +1746 -0
  299. camel/societies/workforce/workforce.py +5730 -366
  300. camel/societies/workforce/workforce_callback.py +103 -0
  301. camel/societies/workforce/workforce_logger.py +647 -0
  302. camel/societies/workforce/workforce_metrics.py +33 -0
  303. camel/storages/__init__.py +10 -2
  304. camel/storages/graph_storages/__init__.py +2 -2
  305. camel/storages/graph_storages/base.py +2 -2
  306. camel/storages/graph_storages/graph_element.py +2 -2
  307. camel/storages/graph_storages/nebula_graph.py +4 -4
  308. camel/storages/graph_storages/neo4j_graph.py +7 -7
  309. camel/storages/key_value_storages/__init__.py +2 -2
  310. camel/storages/key_value_storages/base.py +2 -2
  311. camel/storages/key_value_storages/in_memory.py +2 -2
  312. camel/storages/key_value_storages/json.py +17 -4
  313. camel/storages/key_value_storages/mem0_cloud.py +50 -49
  314. camel/storages/key_value_storages/redis.py +2 -2
  315. camel/storages/object_storages/__init__.py +2 -2
  316. camel/storages/object_storages/amazon_s3.py +2 -2
  317. camel/storages/object_storages/azure_blob.py +2 -2
  318. camel/storages/object_storages/base.py +2 -2
  319. camel/storages/object_storages/google_cloud.py +3 -3
  320. camel/storages/vectordb_storages/__init__.py +12 -2
  321. camel/storages/vectordb_storages/base.py +2 -2
  322. camel/storages/vectordb_storages/chroma.py +731 -0
  323. camel/storages/vectordb_storages/faiss.py +712 -0
  324. camel/storages/vectordb_storages/milvus.py +2 -2
  325. camel/storages/vectordb_storages/oceanbase.py +16 -17
  326. camel/storages/vectordb_storages/pgvector.py +349 -0
  327. camel/storages/vectordb_storages/qdrant.py +6 -6
  328. camel/storages/vectordb_storages/surreal.py +372 -0
  329. camel/storages/vectordb_storages/tidb.py +11 -8
  330. camel/storages/vectordb_storages/weaviate.py +714 -0
  331. camel/tasks/__init__.py +2 -2
  332. camel/tasks/task.py +366 -27
  333. camel/tasks/task_prompt.py +3 -3
  334. camel/terminators/__init__.py +2 -2
  335. camel/terminators/base.py +2 -2
  336. camel/terminators/response_terminator.py +2 -2
  337. camel/terminators/token_limit_terminator.py +2 -2
  338. camel/toolkits/__init__.py +58 -10
  339. camel/toolkits/aci_toolkit.py +66 -21
  340. camel/toolkits/arxiv_toolkit.py +8 -8
  341. camel/toolkits/ask_news_toolkit.py +2 -2
  342. camel/toolkits/async_browser_toolkit.py +174 -575
  343. camel/toolkits/audio_analysis_toolkit.py +3 -3
  344. camel/toolkits/base.py +65 -7
  345. camel/toolkits/bohrium_toolkit.py +318 -0
  346. camel/toolkits/browser_toolkit.py +306 -566
  347. camel/toolkits/browser_toolkit_commons.py +568 -0
  348. camel/toolkits/code_execution.py +67 -11
  349. camel/toolkits/context_summarizer_toolkit.py +684 -0
  350. camel/toolkits/craw4ai_toolkit.py +93 -0
  351. camel/toolkits/dappier_toolkit.py +12 -8
  352. camel/toolkits/data_commons_toolkit.py +2 -2
  353. camel/toolkits/dingtalk.py +1135 -0
  354. camel/toolkits/earth_science_toolkit.py +5367 -0
  355. camel/toolkits/edgeone_pages_mcp_toolkit.py +49 -0
  356. camel/toolkits/excel_toolkit.py +910 -70
  357. camel/toolkits/file_toolkit.py +1402 -0
  358. camel/toolkits/function_tool.py +128 -20
  359. camel/toolkits/github_toolkit.py +148 -43
  360. camel/toolkits/gmail_toolkit.py +1839 -0
  361. camel/toolkits/google_calendar_toolkit.py +40 -6
  362. camel/toolkits/google_drive_mcp_toolkit.py +54 -0
  363. camel/toolkits/google_maps_toolkit.py +2 -2
  364. camel/toolkits/google_scholar_toolkit.py +2 -2
  365. camel/toolkits/human_toolkit.py +36 -12
  366. camel/toolkits/hybrid_browser_toolkit/__init__.py +18 -0
  367. camel/toolkits/hybrid_browser_toolkit/config_loader.py +185 -0
  368. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +246 -0
  369. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +1973 -0
  370. camel/toolkits/hybrid_browser_toolkit/installer.py +203 -0
  371. camel/toolkits/hybrid_browser_toolkit/ts/package-lock.json +4589 -0
  372. camel/toolkits/hybrid_browser_toolkit/ts/package.json +33 -0
  373. camel/toolkits/hybrid_browser_toolkit/ts/src/browser-scripts.js +125 -0
  374. camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +1929 -0
  375. camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +233 -0
  376. camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +589 -0
  377. camel/toolkits/hybrid_browser_toolkit/ts/src/index.ts +7 -0
  378. camel/toolkits/hybrid_browser_toolkit/ts/src/parent-child-filter.ts +226 -0
  379. camel/toolkits/hybrid_browser_toolkit/ts/src/snapshot-parser.ts +219 -0
  380. camel/toolkits/hybrid_browser_toolkit/ts/src/som-screenshot-injected.ts +543 -0
  381. camel/toolkits/hybrid_browser_toolkit/ts/src/types.ts +129 -0
  382. camel/toolkits/hybrid_browser_toolkit/ts/tsconfig.json +27 -0
  383. camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +319 -0
  384. camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +1037 -0
  385. camel/toolkits/hybrid_browser_toolkit_py/__init__.py +17 -0
  386. camel/toolkits/hybrid_browser_toolkit_py/actions.py +575 -0
  387. camel/toolkits/hybrid_browser_toolkit_py/agent.py +311 -0
  388. camel/toolkits/hybrid_browser_toolkit_py/browser_session.py +787 -0
  389. camel/toolkits/hybrid_browser_toolkit_py/config_loader.py +490 -0
  390. camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +2390 -0
  391. camel/toolkits/hybrid_browser_toolkit_py/snapshot.py +233 -0
  392. camel/toolkits/hybrid_browser_toolkit_py/stealth_script.js +0 -0
  393. camel/toolkits/hybrid_browser_toolkit_py/unified_analyzer.js +1043 -0
  394. camel/toolkits/image_analysis_toolkit.py +3 -3
  395. camel/toolkits/image_generation_toolkit.py +390 -0
  396. camel/toolkits/jina_reranker_toolkit.py +195 -79
  397. camel/toolkits/klavis_toolkit.py +7 -3
  398. camel/toolkits/linkedin_toolkit.py +2 -2
  399. camel/toolkits/markitdown_toolkit.py +104 -0
  400. camel/toolkits/math_toolkit.py +66 -12
  401. camel/toolkits/mcp_toolkit.py +841 -600
  402. camel/toolkits/memory_toolkit.py +7 -3
  403. camel/toolkits/meshy_toolkit.py +2 -2
  404. camel/toolkits/message_agent_toolkit.py +608 -0
  405. camel/toolkits/message_integration.py +724 -0
  406. camel/toolkits/mineru_toolkit.py +2 -2
  407. camel/toolkits/minimax_mcp_toolkit.py +195 -0
  408. camel/toolkits/networkx_toolkit.py +2 -2
  409. camel/toolkits/note_taking_toolkit.py +277 -0
  410. camel/toolkits/notion_mcp_toolkit.py +224 -0
  411. camel/toolkits/notion_toolkit.py +2 -2
  412. camel/toolkits/open_api_specs/biztoc/__init__.py +2 -2
  413. camel/toolkits/open_api_specs/biztoc/ai-plugin.json +1 -1
  414. camel/toolkits/open_api_specs/coursera/__init__.py +2 -2
  415. camel/toolkits/open_api_specs/create_qr_code/__init__.py +2 -2
  416. camel/toolkits/open_api_specs/klarna/__init__.py +2 -2
  417. camel/toolkits/open_api_specs/nasa_apod/__init__.py +2 -2
  418. camel/toolkits/open_api_specs/outschool/__init__.py +2 -2
  419. camel/toolkits/open_api_specs/outschool/ai-plugin.json +1 -1
  420. camel/toolkits/open_api_specs/outschool/openapi.yaml +1 -1
  421. camel/toolkits/open_api_specs/outschool/paths/__init__.py +2 -2
  422. camel/toolkits/open_api_specs/outschool/paths/get_classes.py +2 -2
  423. camel/toolkits/open_api_specs/outschool/paths/search_teachers.py +2 -2
  424. camel/toolkits/open_api_specs/security_config.py +2 -2
  425. camel/toolkits/open_api_specs/speak/__init__.py +2 -2
  426. camel/toolkits/open_api_specs/web_scraper/__init__.py +2 -2
  427. camel/toolkits/open_api_specs/web_scraper/ai-plugin.json +1 -1
  428. camel/toolkits/open_api_specs/web_scraper/paths/__init__.py +2 -2
  429. camel/toolkits/open_api_specs/web_scraper/paths/scraper.py +2 -2
  430. camel/toolkits/open_api_toolkit.py +2 -2
  431. camel/toolkits/openbb_toolkit.py +7 -3
  432. camel/toolkits/origene_mcp_toolkit.py +56 -0
  433. camel/toolkits/page_script.js +86 -74
  434. camel/toolkits/playwright_mcp_toolkit.py +27 -32
  435. camel/toolkits/pptx_toolkit.py +790 -0
  436. camel/toolkits/pubmed_toolkit.py +2 -2
  437. camel/toolkits/pulse_mcp_search_toolkit.py +2 -2
  438. camel/toolkits/pyautogui_toolkit.py +2 -2
  439. camel/toolkits/reddit_toolkit.py +2 -2
  440. camel/toolkits/resend_toolkit.py +168 -0
  441. camel/toolkits/retrieval_toolkit.py +2 -2
  442. camel/toolkits/screenshot_toolkit.py +213 -0
  443. camel/toolkits/search_toolkit.py +539 -146
  444. camel/toolkits/searxng_toolkit.py +2 -2
  445. camel/toolkits/semantic_scholar_toolkit.py +2 -2
  446. camel/toolkits/slack_toolkit.py +108 -58
  447. camel/toolkits/sql_toolkit.py +712 -0
  448. camel/toolkits/stripe_toolkit.py +2 -2
  449. camel/toolkits/sympy_toolkit.py +3 -3
  450. camel/toolkits/task_planning_toolkit.py +134 -0
  451. camel/toolkits/terminal_toolkit/__init__.py +18 -0
  452. camel/toolkits/terminal_toolkit/terminal_toolkit.py +1070 -0
  453. camel/toolkits/terminal_toolkit/utils.py +532 -0
  454. camel/toolkits/thinking_toolkit.py +3 -3
  455. camel/toolkits/twitter_toolkit.py +8 -3
  456. camel/toolkits/vertex_ai_veo_toolkit.py +590 -0
  457. camel/toolkits/video_analysis_toolkit.py +112 -29
  458. camel/toolkits/video_download_toolkit.py +22 -16
  459. camel/toolkits/weather_toolkit.py +2 -2
  460. camel/toolkits/web_deploy_toolkit.py +1219 -0
  461. camel/toolkits/wechat_official_toolkit.py +483 -0
  462. camel/toolkits/whatsapp_toolkit.py +2 -2
  463. camel/toolkits/wolfram_alpha_toolkit.py +53 -25
  464. camel/toolkits/zapier_toolkit.py +7 -3
  465. camel/types/__init__.py +4 -4
  466. camel/types/agents/__init__.py +2 -2
  467. camel/types/agents/tool_calling_record.py +6 -3
  468. camel/types/enums.py +454 -35
  469. camel/types/mcp_registries.py +2 -2
  470. camel/types/openai_types.py +4 -4
  471. camel/types/unified_model_type.py +43 -6
  472. camel/utils/__init__.py +20 -2
  473. camel/utils/async_func.py +2 -2
  474. camel/utils/chunker/__init__.py +2 -2
  475. camel/utils/chunker/base.py +2 -2
  476. camel/utils/chunker/code_chunker.py +2 -2
  477. camel/utils/chunker/uio_chunker.py +2 -2
  478. camel/utils/commons.py +65 -7
  479. camel/utils/constants.py +5 -2
  480. camel/utils/context_utils.py +1134 -0
  481. camel/utils/deduplication.py +2 -2
  482. camel/utils/filename.py +2 -2
  483. camel/utils/langfuse.py +258 -0
  484. camel/utils/mcp.py +140 -6
  485. camel/utils/mcp_client.py +1056 -0
  486. camel/utils/message_summarizer.py +148 -0
  487. camel/utils/response_format.py +2 -2
  488. camel/utils/token_counting.py +45 -22
  489. camel/utils/tool_result.py +44 -0
  490. camel/verifiers/__init__.py +2 -2
  491. camel/verifiers/base.py +2 -2
  492. camel/verifiers/math_verifier.py +2 -2
  493. camel/verifiers/models.py +2 -2
  494. camel/verifiers/physics_verifier.py +2 -2
  495. camel/verifiers/python_verifier.py +2 -2
  496. {camel_ai-0.2.59.dist-info → camel_ai-0.2.82.dist-info}/METADATA +349 -108
  497. camel_ai-0.2.82.dist-info/RECORD +507 -0
  498. {camel_ai-0.2.59.dist-info → camel_ai-0.2.82.dist-info}/WHEEL +1 -1
  499. {camel_ai-0.2.59.dist-info → camel_ai-0.2.82.dist-info}/licenses/LICENSE +1 -1
  500. camel/loaders/pandas_reader.py +0 -368
  501. camel/runtime/api.py +0 -97
  502. camel/toolkits/dalle_toolkit.py +0 -171
  503. camel/toolkits/file_write_toolkit.py +0 -395
  504. camel/toolkits/openai_agent_toolkit.py +0 -135
  505. camel/toolkits/terminal_toolkit.py +0 -1037
  506. camel_ai-0.2.59.dist-info/RECORD +0 -410
@@ -1,4 +1,4 @@
1
- # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
1
+ # ========= Copyright 2023-2025 @ CAMEL-AI.org. All Rights Reserved. =========
2
2
  # Licensed under the Apache License, Version 2.0 (the "License");
3
3
  # you may not use this file except in compliance with the License.
4
4
  # You may obtain a copy of the License at
@@ -10,13 +10,14 @@
10
10
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
11
  # See the License for the specific language governing permissions and
12
12
  # limitations under the License.
13
- # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
13
+ # ========= Copyright 2023-2025 @ CAMEL-AI.org. All Rights Reserved. =========
14
+
15
+ # Enables postponed evaluation of annotations (for string-based type hints)
16
+ from __future__ import annotations
14
17
 
15
18
  import datetime
16
19
  import io
17
- import json
18
20
  import os
19
- import random
20
21
  import re
21
22
  import shutil
22
23
  import time
@@ -25,21 +26,17 @@ from copy import deepcopy
25
26
  from typing import (
26
27
  TYPE_CHECKING,
27
28
  Any,
28
- BinaryIO,
29
29
  Dict,
30
30
  List,
31
31
  Literal,
32
32
  Optional,
33
33
  Tuple,
34
- TypedDict,
35
34
  Union,
36
35
  cast,
37
36
  )
38
37
 
39
- from PIL import Image, ImageDraw, ImageFont
38
+ from PIL import Image
40
39
 
41
- if TYPE_CHECKING:
42
- from camel.agents import ChatAgent
43
40
  from camel.logger import get_logger
44
41
  from camel.messages import BaseMessage
45
42
  from camel.models import BaseModelBackend, ModelFactory
@@ -53,85 +50,39 @@ from camel.utils import (
53
50
  sanitize_filename,
54
51
  )
55
52
 
56
- logger = get_logger(__name__)
53
+ # Import shared components from browser_toolkit_commons
54
+ from .browser_toolkit_commons import (
55
+ ACTION_WITH_FEEDBACK_LIST,
56
+ AVAILABLE_ACTIONS_PROMPT,
57
+ GET_FINAL_ANSWER_PROMPT_TEMPLATE,
58
+ OBSERVE_PROMPT_TEMPLATE,
59
+ PLANNING_AGENT_SYSTEM_PROMPT,
60
+ TASK_PLANNING_PROMPT_TEMPLATE,
61
+ TASK_REPLANNING_PROMPT_TEMPLATE,
62
+ WEB_AGENT_SYSTEM_PROMPT,
63
+ InteractiveRegion,
64
+ VisualViewport,
65
+ _add_set_of_mark,
66
+ _parse_json_output,
67
+ _reload_image,
68
+ interactive_region_from_dict,
69
+ visual_viewport_from_dict,
70
+ )
57
71
 
58
- TOP_NO_LABEL_ZONE = 20
72
+ if TYPE_CHECKING:
73
+ from playwright.sync_api import (
74
+ Browser,
75
+ BrowserContext,
76
+ FloatRect,
77
+ Page,
78
+ Playwright,
79
+ )
80
+
81
+ from camel.agents import ChatAgent
59
82
 
83
+ logger = get_logger(__name__)
60
84
 
61
- AVAILABLE_ACTIONS_PROMPT = """
62
- 1. `fill_input_id(identifier: Union[str, int], text: str)`: Fill an input
63
- field (e.g. search box) with the given text and press Enter.
64
- 2. `click_id(identifier: Union[str, int])`: Click an element with the given ID.
65
- 3. `hover_id(identifier: Union[str, int])`: Hover over an element with the
66
- given ID.
67
- 4. `download_file_id(identifier: Union[str, int])`: Download a file with the
68
- given ID. It returns the path to the downloaded file. If the file is
69
- successfully downloaded, you can stop the simulation and report the path to
70
- the downloaded file for further processing.
71
- 5. `scroll_to_bottom()`: Scroll to the bottom of the page.
72
- 6. `scroll_to_top()`: Scroll to the top of the page.
73
- 7. `scroll_up()`: Scroll up the page. It is suitable when you want to see the
74
- elements above the current viewport.
75
- 8. `scroll_down()`: Scroll down the page. It is suitable when you want to see
76
- the elements below the current viewport. If the webpage does not change, It
77
- means that the webpage has scrolled to the bottom.
78
- 9. `back()`: Navigate back to the previous page. This is useful when you want
79
- to go back to the previous page, as current page is not useful.
80
- 10. `stop()`: Stop the action process, because the task is completed or failed
81
- (impossible to find the answer). In this situation, you should provide your
82
- answer in your output.
83
- 11. `get_url()`: Get the current URL of the current page.
84
- 12. `find_text_on_page(search_text: str)`: Find the next given text on the
85
- current whole page, and scroll the page to the targeted text. It is equivalent
86
- to pressing Ctrl + F and searching for the text, and is powerful when you want
87
- to fast-check whether the current page contains some specific text.
88
- 13. `visit_page(url: str)`: Go to the specific url page.
89
- 14. `click_blank_area()`: Click a blank area of the page to unfocus the
90
- current element. It is useful when you have clicked an element but it cannot
91
- unfocus itself (e.g. Menu bar) to automatically render the updated webpage.
92
- 15. `ask_question_about_video(question: str)`: Ask a question about the
93
- current webpage which contains video, e.g. youtube websites.
94
- """
95
-
96
- ACTION_WITH_FEEDBACK_LIST = [
97
- 'ask_question_about_video',
98
- 'download_file_id',
99
- 'find_text_on_page',
100
- ]
101
-
102
-
103
- # Code from magentic-one
104
- class DOMRectangle(TypedDict):
105
- x: Union[int, float]
106
- y: Union[int, float]
107
- width: Union[int, float]
108
- height: Union[int, float]
109
- top: Union[int, float]
110
- right: Union[int, float]
111
- bottom: Union[int, float]
112
- left: Union[int, float]
113
-
114
-
115
- class VisualViewport(TypedDict):
116
- height: Union[int, float]
117
- width: Union[int, float]
118
- offsetLeft: Union[int, float]
119
- offsetTop: Union[int, float]
120
- pageLeft: Union[int, float]
121
- pageTop: Union[int, float]
122
- scale: Union[int, float]
123
- clientWidth: Union[int, float]
124
- clientHeight: Union[int, float]
125
- scrollWidth: Union[int, float]
126
- scrollHeight: Union[int, float]
127
-
128
-
129
- class InteractiveRegion(TypedDict):
130
- tag_name: str
131
- role: str
132
- aria_name: str
133
- v_scrollable: bool
134
- rects: List[DOMRectangle]
85
+ TOP_NO_LABEL_ZONE = 20
135
86
 
136
87
 
137
88
  def _get_str(d: Any, k: str) -> str:
@@ -167,270 +118,6 @@ def _get_bool(d: Any, k: str) -> bool:
167
118
  )
168
119
 
169
120
 
170
- def _parse_json_output(text: str) -> Dict[str, Any]:
171
- r"""Extract JSON output from a string."""
172
-
173
- markdown_pattern = r'```(?:json)?\s*(.*?)\s*```'
174
- markdown_match = re.search(markdown_pattern, text, re.DOTALL)
175
- if markdown_match:
176
- text = markdown_match.group(1).strip()
177
-
178
- triple_quotes_pattern = r'"""(?:json)?\s*(.*?)\s*"""'
179
- triple_quotes_match = re.search(triple_quotes_pattern, text, re.DOTALL)
180
- if triple_quotes_match:
181
- text = triple_quotes_match.group(1).strip()
182
-
183
- try:
184
- return json.loads(text)
185
- except json.JSONDecodeError:
186
- try:
187
- fixed_text = re.sub(
188
- r'`([^`]*?)`(?=\s*[:,\[\]{}]|$)', r'"\1"', text
189
- )
190
- return json.loads(fixed_text)
191
- except json.JSONDecodeError:
192
- result = {}
193
- try:
194
- bool_pattern = r'"(\w+)"\s*:\s*(true|false)'
195
- for match in re.finditer(bool_pattern, text, re.IGNORECASE):
196
- key, value = match.groups()
197
- result[key] = value.lower() == "true"
198
-
199
- str_pattern = r'"(\w+)"\s*:\s*"([^"]*)"'
200
- for match in re.finditer(str_pattern, text):
201
- key, value = match.groups()
202
- result[key] = value
203
-
204
- num_pattern = r'"(\w+)"\s*:\s*(-?\d+(?:\.\d+)?)'
205
- for match in re.finditer(num_pattern, text):
206
- key, value = match.groups()
207
- try:
208
- result[key] = int(value)
209
- except ValueError:
210
- result[key] = float(value)
211
-
212
- empty_str_pattern = r'"(\w+)"\s*:\s*""'
213
- for match in re.finditer(empty_str_pattern, text):
214
- key = match.group(1)
215
- result[key] = ""
216
-
217
- if result:
218
- return result
219
-
220
- logger.warning(f"Failed to parse JSON output: {text}")
221
- return {}
222
- except Exception as e:
223
- logger.warning(f"Error while extracting fields from JSON: {e}")
224
- return {}
225
-
226
-
227
- def _reload_image(image: Image.Image) -> Image.Image:
228
- buffer = io.BytesIO()
229
- image.save(buffer, format="PNG")
230
- buffer.seek(0)
231
- return Image.open(buffer)
232
-
233
-
234
- def dom_rectangle_from_dict(rect: Dict[str, Any]) -> DOMRectangle:
235
- r"""Create a DOMRectangle object from a dictionary."""
236
- return DOMRectangle(
237
- x=_get_number(rect, "x"),
238
- y=_get_number(rect, "y"),
239
- width=_get_number(rect, "width"),
240
- height=_get_number(rect, "height"),
241
- top=_get_number(rect, "top"),
242
- right=_get_number(rect, "right"),
243
- bottom=_get_number(rect, "bottom"),
244
- left=_get_number(rect, "left"),
245
- )
246
-
247
-
248
- def interactive_region_from_dict(region: Dict[str, Any]) -> InteractiveRegion:
249
- r"""Create an :class:`InteractiveRegion` object from a dictionary."""
250
- typed_rects: List[DOMRectangle] = []
251
- for rect in region["rects"]:
252
- typed_rects.append(dom_rectangle_from_dict(rect))
253
-
254
- return InteractiveRegion(
255
- tag_name=_get_str(region, "tag_name"),
256
- role=_get_str(region, "role"),
257
- aria_name=_get_str(region, "aria-name"),
258
- v_scrollable=_get_bool(region, "v-scrollable"),
259
- rects=typed_rects,
260
- )
261
-
262
-
263
- def visual_viewport_from_dict(viewport: Dict[str, Any]) -> VisualViewport:
264
- r"""Create a :class:`VisualViewport` object from a dictionary."""
265
- return VisualViewport(
266
- height=_get_number(viewport, "height"),
267
- width=_get_number(viewport, "width"),
268
- offsetLeft=_get_number(viewport, "offsetLeft"),
269
- offsetTop=_get_number(viewport, "offsetTop"),
270
- pageLeft=_get_number(viewport, "pageLeft"),
271
- pageTop=_get_number(viewport, "pageTop"),
272
- scale=_get_number(viewport, "scale"),
273
- clientWidth=_get_number(viewport, "clientWidth"),
274
- clientHeight=_get_number(viewport, "clientHeight"),
275
- scrollWidth=_get_number(viewport, "scrollWidth"),
276
- scrollHeight=_get_number(viewport, "scrollHeight"),
277
- )
278
-
279
-
280
- def add_set_of_mark(
281
- screenshot: Union[bytes, Image.Image, io.BufferedIOBase],
282
- ROIs: Dict[str, InteractiveRegion],
283
- ) -> Tuple[Image.Image, List[str], List[str], List[str]]:
284
- if isinstance(screenshot, Image.Image):
285
- return _add_set_of_mark(screenshot, ROIs)
286
-
287
- if isinstance(screenshot, bytes):
288
- screenshot = io.BytesIO(screenshot)
289
-
290
- image = Image.open(cast(BinaryIO, screenshot))
291
- comp, visible_rects, rects_above, rects_below = _add_set_of_mark(
292
- image, ROIs
293
- )
294
- image.close()
295
- return comp, visible_rects, rects_above, rects_below
296
-
297
-
298
- def _add_set_of_mark(
299
- screenshot: Image.Image, ROIs: Dict[str, InteractiveRegion]
300
- ) -> Tuple[Image.Image, List[str], List[str], List[str]]:
301
- r"""Add a set of marks to the screenshot.
302
-
303
- Args:
304
- screenshot (Image.Image): The screenshot to add marks to.
305
- ROIs (Dict[str, InteractiveRegion]): The regions to add marks to.
306
-
307
- Returns:
308
- Tuple[Image.Image, List[str], List[str], List[str]]: A tuple
309
- containing the screenshot with marked ROIs, ROIs fully within the
310
- images, ROIs located above the visible area, and ROIs located below
311
- the visible area.
312
- """
313
- visible_rects: List[str] = list()
314
- rects_above: List[str] = list() # Scroll up to see
315
- rects_below: List[str] = list() # Scroll down to see
316
-
317
- fnt = ImageFont.load_default(14)
318
- base = screenshot.convert("L").convert("RGBA")
319
- overlay = Image.new("RGBA", base.size)
320
-
321
- draw = ImageDraw.Draw(overlay)
322
- for r in ROIs:
323
- for rect in ROIs[r]["rects"]:
324
- # Empty rectangles
325
- if not rect or rect["width"] == 0 or rect["height"] == 0:
326
- continue
327
-
328
- # TODO: add scroll left and right?
329
- horizontal_center = (rect["right"] + rect["left"]) / 2.0
330
- vertical_center = (rect["top"] + rect["bottom"]) / 2.0
331
- is_within_horizon = 0 <= horizontal_center < base.size[0]
332
- is_above_viewport = vertical_center < 0
333
- is_below_viewport = vertical_center >= base.size[1]
334
-
335
- if is_within_horizon:
336
- if is_above_viewport:
337
- rects_above.append(r)
338
- elif is_below_viewport:
339
- rects_below.append(r)
340
- else: # Fully visible
341
- visible_rects.append(r)
342
- _draw_roi(draw, int(r), fnt, rect)
343
-
344
- comp = Image.alpha_composite(base, overlay)
345
- overlay.close()
346
- return comp, visible_rects, rects_above, rects_below
347
-
348
-
349
- def _draw_roi(
350
- draw: ImageDraw.ImageDraw,
351
- idx: int,
352
- font: ImageFont.FreeTypeFont | ImageFont.ImageFont,
353
- rect: DOMRectangle,
354
- ) -> None:
355
- r"""Draw a ROI on the image.
356
-
357
- Args:
358
- draw (ImageDraw.ImageDraw): The draw object.
359
- idx (int): The index of the ROI.
360
- font (ImageFont.FreeTypeFont | ImageFont.ImageFont): The font.
361
- rect (DOMRectangle): The DOM rectangle.
362
- """
363
- color = _get_random_color(idx)
364
- text_color = _get_text_color(color)
365
-
366
- roi = ((rect["left"], rect["top"]), (rect["right"], rect["bottom"]))
367
-
368
- label_location = (rect["right"], rect["top"])
369
- label_anchor = "rb"
370
-
371
- if label_location[1] <= TOP_NO_LABEL_ZONE:
372
- label_location = (rect["right"], rect["bottom"])
373
- label_anchor = "rt"
374
-
375
- draw.rectangle(
376
- roi, outline=color, fill=(color[0], color[1], color[2], 48), width=2
377
- )
378
-
379
- bbox = draw.textbbox(
380
- label_location,
381
- str(idx),
382
- font=font,
383
- anchor=label_anchor,
384
- align="center",
385
- )
386
- bbox = (bbox[0] - 3, bbox[1] - 3, bbox[2] + 3, bbox[3] + 3)
387
- draw.rectangle(bbox, fill=color)
388
-
389
- draw.text(
390
- label_location,
391
- str(idx),
392
- fill=text_color,
393
- font=font,
394
- anchor=label_anchor,
395
- align="center",
396
- )
397
-
398
-
399
- def _get_text_color(
400
- bg_color: Tuple[int, int, int, int],
401
- ) -> Tuple[int, int, int, int]:
402
- r"""Determine the ideal text color (black or white) for contrast.
403
-
404
- Args:
405
- bg_color: The background color (R, G, B, A).
406
-
407
- Returns:
408
- A tuple representing black or white color for text.
409
- """
410
- luminance = bg_color[0] * 0.3 + bg_color[1] * 0.59 + bg_color[2] * 0.11
411
- return (0, 0, 0, 255) if luminance > 120 else (255, 255, 255, 255)
412
-
413
-
414
- def _get_random_color(identifier: int) -> Tuple[int, int, int, int]:
415
- r"""Generate a consistent random RGBA color based on the identifier.
416
-
417
- Args:
418
- identifier: The ID used as a seed to ensure color consistency.
419
-
420
- Returns:
421
- A tuple representing (R, G, B, A) values.
422
- """
423
- rnd = random.Random(int(identifier))
424
- r = rnd.randint(0, 255)
425
- g = rnd.randint(125, 255)
426
- b = rnd.randint(0, 50)
427
- color = [r, g, b]
428
- # TODO: check why shuffle is needed?
429
- rnd.shuffle(color)
430
- color.append(255)
431
- return cast(Tuple[int, int, int, int], tuple(color))
432
-
433
-
434
121
  class BaseBrowser:
435
122
  def __init__(
436
123
  self,
@@ -438,6 +125,7 @@ class BaseBrowser:
438
125
  cache_dir: Optional[str] = None,
439
126
  channel: Literal["chrome", "msedge", "chromium"] = "chromium",
440
127
  cookie_json_path: Optional[str] = None,
128
+ user_data_dir: Optional[str] = None,
441
129
  ):
442
130
  r"""Initialize the WebBrowser instance.
443
131
 
@@ -449,28 +137,36 @@ class BaseBrowser:
449
137
  "chromium".
450
138
  cookie_json_path (Optional[str]): Path to a JSON file containing
451
139
  authentication cookies and browser storage state. If provided
452
- and the file exists, the browser will load this state to maintain
453
- authenticated sessions without requiring manual login.
140
+ and the file exists, the browser will load this state to
141
+ maintain authenticated sessions. This is primarily used when
142
+ `user_data_dir` is not set.
143
+ user_data_dir (Optional[str]): The directory to store user data
144
+ for persistent context. If None, a fresh browser instance
145
+ is used without saving data. (default: :obj:`None`)
454
146
 
455
147
  Returns:
456
148
  None
457
149
  """
458
- from playwright.sync_api import (
459
- sync_playwright,
460
- )
461
-
462
- self.history: list = []
150
+ self.history: List[Any] = []
463
151
  self.headless = headless
464
152
  self.channel = channel
465
153
  self._ensure_browser_installed()
466
- self.playwright = sync_playwright().start()
467
- self.page_history: list = [] # stores the history of visited pages
154
+ # lazy initialization - playwright is started in init() method
155
+ self.playwright: Optional[Playwright] = None
156
+ self.page_history: List[
157
+ str
158
+ ] = [] # stores the history of visited pages
468
159
  self.cookie_json_path = cookie_json_path
160
+ self.user_data_dir = user_data_dir
469
161
 
470
162
  # Set the cache directory
471
163
  self.cache_dir = "tmp/" if cache_dir is None else cache_dir
472
164
  os.makedirs(self.cache_dir, exist_ok=True)
473
165
 
166
+ # Create user data directory only if specified
167
+ if self.user_data_dir:
168
+ os.makedirs(self.user_data_dir, exist_ok=True)
169
+
474
170
  # Load the page script
475
171
  abs_dir_path = os.path.dirname(os.path.abspath(__file__))
476
172
  page_script_path = os.path.join(abs_dir_path, "page_script.js")
@@ -483,27 +179,70 @@ class BaseBrowser:
483
179
  raise FileNotFoundError(
484
180
  f"Page script file not found at path: {page_script_path}"
485
181
  )
182
+ self.browser: Optional[Browser] = None
183
+ self.context: Optional[BrowserContext] = None
184
+ self.page: Optional[Page] = None
185
+ self.page_url: Optional[str] = None
186
+ self.web_agent_model: Optional[BaseModelBackend] = (
187
+ None # Added for type hinting
188
+ )
486
189
 
487
190
  def init(self) -> None:
488
191
  r"""Initialize the browser."""
489
- # Launch the browser, if headless is False, the browser will display
490
- self.browser = self.playwright.chromium.launch(
491
- headless=self.headless, channel=self.channel
192
+ # lazy start playwright when init() is called, not in __init__
193
+ if self.playwright is None:
194
+ from playwright.sync_api import sync_playwright
195
+
196
+ self.playwright = sync_playwright().start()
197
+
198
+ browser_launch_args = [
199
+ "--disable-blink-features=AutomationControlled", # Basic stealth
200
+ ]
201
+
202
+ user_agent_string = (
203
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
204
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
205
+ "Chrome/91.0.4472.124 Safari/537.36"
492
206
  )
493
207
 
494
- # Check if cookie file exists before using it to maintain
495
- # authenticated sessions. This prevents errors when the cookie file
496
- # doesn't exist
497
- if self.cookie_json_path and os.path.exists(self.cookie_json_path):
498
- self.context = self.browser.new_context(
499
- accept_downloads=True, storage_state=self.cookie_json_path
208
+ if self.user_data_dir:
209
+ self.context = self.playwright.chromium.launch_persistent_context(
210
+ user_data_dir=self.user_data_dir,
211
+ headless=self.headless,
212
+ channel=self.channel,
213
+ accept_downloads=True,
214
+ user_agent=user_agent_string,
215
+ java_script_enabled=True,
216
+ args=browser_launch_args,
500
217
  )
218
+ self.browser = None # Not using a separate browser instance
219
+ if (
220
+ len(self.context.pages) > 0
221
+ ): # Persistent context might reopen pages
222
+ self.page = self.context.pages[0]
223
+ else:
224
+ self.page = self.context.new_page()
501
225
  else:
502
- self.context = self.browser.new_context(
503
- accept_downloads=True,
226
+ # Launch a fresh browser instance
227
+ self.browser = self.playwright.chromium.launch(
228
+ headless=self.headless,
229
+ channel=self.channel,
230
+ args=browser_launch_args,
504
231
  )
505
- # Create a new page
506
- self.page = self.context.new_page()
232
+
233
+ new_context_kwargs: Dict[str, Any] = {
234
+ "accept_downloads": True,
235
+ "user_agent": user_agent_string,
236
+ "java_script_enabled": True,
237
+ }
238
+ if self.cookie_json_path and os.path.exists(self.cookie_json_path):
239
+ new_context_kwargs["storage_state"] = self.cookie_json_path
240
+
241
+ self.context = self.browser.new_context(**new_context_kwargs)
242
+ self.page = self.context.new_page()
243
+
244
+ assert self.context is not None
245
+ assert self.page is not None
507
246
 
508
247
  def clean_cache(self) -> None:
509
248
  r"""Delete the cache directory and its contents."""
@@ -513,7 +252,7 @@ class BaseBrowser:
513
252
  def _wait_for_load(self, timeout: int = 20) -> None:
514
253
  r"""Wait for a certain amount of time for the page to load."""
515
254
  timeout_ms = timeout * 1000
516
-
255
+ assert self.page is not None
517
256
  self.page.wait_for_load_state("load", timeout=timeout_ms)
518
257
 
519
258
  # TODO: check if this is needed
@@ -521,13 +260,14 @@ class BaseBrowser:
521
260
 
522
261
  def click_blank_area(self) -> None:
523
262
  r"""Click a blank area of the page to unfocus the current element."""
263
+ assert self.page is not None
524
264
  self.page.mouse.click(0, 0)
525
265
  self._wait_for_load()
526
266
 
527
267
  @retry_on_error()
528
268
  def visit_page(self, url: str) -> None:
529
269
  r"""Visit a page with the given URL."""
530
-
270
+ assert self.page is not None
531
271
  self.page.goto(url)
532
272
  self._wait_for_load()
533
273
  self.page_url = url
@@ -544,7 +284,8 @@ class BaseBrowser:
544
284
  """
545
285
  current_url = self.get_url()
546
286
 
547
- # Confirm with user before proceeding due to potential slow processing time
287
+ # Confirm with user before proceeding due to potential slow
288
+ # processing time
548
289
  confirmation_message = (
549
290
  f"Do you want to analyze the video on the current "
550
291
  f"page({current_url})? This operation may take a long time.(y/n): "
@@ -555,7 +296,10 @@ class BaseBrowser:
555
296
  return "User cancelled the video analysis."
556
297
 
557
298
  model = None
558
- if hasattr(self, 'web_agent_model'):
299
+ if (
300
+ hasattr(self, 'web_agent_model')
301
+ and self.web_agent_model is not None
302
+ ):
559
303
  model = self.web_agent_model
560
304
 
561
305
  video_analyzer = VideoAnalysisToolkit(model=model)
@@ -577,7 +321,7 @@ class BaseBrowser:
577
321
  image and the path to the image file if saved, otherwise
578
322
  :obj:`None`.
579
323
  """
580
-
324
+ assert self.page is not None
581
325
  image_data = self.page.screenshot(timeout=60000)
582
326
  image = Image.open(io.BytesIO(image_data))
583
327
 
@@ -585,6 +329,7 @@ class BaseBrowser:
585
329
  if save_image:
586
330
  # Get url name to form a file name
587
331
  # Use urlparser for a safer extraction the url name
332
+ assert self.page_url is not None
588
333
  parsed_url = urllib.parse.urlparse(self.page_url)
589
334
  # Max length is set to 241 as there are 10 characters for the
590
335
  # timestamp and 4 characters for the file extension:
@@ -612,17 +357,24 @@ class BaseBrowser:
612
357
  Returns:
613
358
  List[str]: A list of paths to the screenshot files.
614
359
  """
615
- screenshots = []
616
- scroll_height = self.page.evaluate("document.body.scrollHeight")
360
+ screenshots: List[str] = [] # Ensure screenshots is typed
361
+ assert self.page is not None
362
+ scroll_height_eval = self.page.evaluate("document.body.scrollHeight")
363
+ scroll_height = cast(
364
+ float, scroll_height_eval
365
+ ) # Ensure scroll_height is
366
+ # float
367
+
617
368
  assert self.page.viewport_size is not None
618
369
  viewport_height = self.page.viewport_size["height"]
619
- current_scroll = 0
620
- screenshot_index = 1
370
+ current_scroll_eval = self.page.evaluate("window.scrollY")
371
+ current_scroll = cast(float, current_scroll_eval)
372
+ # screenshot_index = 1 # This variable is not used
621
373
 
622
374
  max_height = scroll_height - viewport_height
623
375
  scroll_step = int(viewport_height * scroll_ratio)
624
376
 
625
- last_height = 0
377
+ last_height = 0.0 # Initialize last_height as float
626
378
 
627
379
  while True:
628
380
  logger.debug(
@@ -631,19 +383,22 @@ class BaseBrowser:
631
383
  )
632
384
 
633
385
  _, file_path = self.get_screenshot(save_image=True)
634
- screenshots.append(file_path)
386
+ if file_path is not None: # Ensure file_path is not None before
387
+ # appending
388
+ screenshots.append(file_path)
635
389
 
636
390
  self.page.evaluate(f"window.scrollBy(0, {scroll_step})")
637
391
  # Allow time for content to load
638
392
  time.sleep(0.5)
639
393
 
640
- current_scroll = self.page.evaluate("window.scrollY")
394
+ current_scroll_eval = self.page.evaluate("window.scrollY")
395
+ current_scroll = cast(float, current_scroll_eval)
641
396
  # Break if there is no significant scroll
642
397
  if abs(current_scroll - last_height) < viewport_height * 0.1:
643
398
  break
644
399
 
645
400
  last_height = current_scroll
646
- screenshot_index += 1
401
+ # screenshot_index += 1 # This variable is not used
647
402
 
648
403
  return screenshots
649
404
 
@@ -653,13 +408,17 @@ class BaseBrowser:
653
408
  Returns:
654
409
  VisualViewport: The visual viewport of the current page.
655
410
  """
411
+ assert self.page is not None
656
412
  try:
657
413
  self.page.evaluate(self.page_script)
658
414
  except Exception as e:
659
415
  logger.warning(f"Error evaluating page script: {e}")
660
416
 
417
+ visual_viewport_eval = self.page.evaluate(
418
+ "MultimodalWebSurfer.getVisualViewport();"
419
+ )
661
420
  return visual_viewport_from_dict(
662
- self.page.evaluate("MultimodalWebSurfer.getVisualViewport();")
421
+ cast(Dict[str, Any], visual_viewport_eval)
663
422
  )
664
423
 
665
424
  def get_interactive_elements(self) -> Dict[str, InteractiveRegion]:
@@ -668,6 +427,7 @@ class BaseBrowser:
668
427
  Returns:
669
428
  Dict[str, InteractiveRegion]: A dictionary of interactive elements.
670
429
  """
430
+ assert self.page is not None
671
431
  try:
672
432
  self.page.evaluate(self.page_script)
673
433
  except Exception as e:
@@ -682,7 +442,7 @@ class BaseBrowser:
682
442
  for k in result:
683
443
  typed_results[k] = interactive_region_from_dict(result[k])
684
444
 
685
- return typed_results # type: ignore[return-value]
445
+ return typed_results
686
446
 
687
447
  def get_som_screenshot(
688
448
  self,
@@ -696,7 +456,8 @@ class BaseBrowser:
696
456
  directory.
697
457
 
698
458
  Returns:
699
- Tuple[Image.Image, Union[str, None]]: A tuple containing the screenshot image
459
+ Tuple[Image.Image, Union[str, None]]: A tuple containing the
460
+ screenshot image
700
461
  and an optional path to the image file if saved, otherwise
701
462
  :obj:`None`.
702
463
  """
@@ -706,11 +467,12 @@ class BaseBrowser:
706
467
  rects = self.get_interactive_elements()
707
468
 
708
469
  file_path: str | None = None
709
- comp, _, _, _ = add_set_of_mark(
470
+ comp, _, _, _ = _add_set_of_mark(
710
471
  screenshot,
711
- rects, # type: ignore[arg-type]
472
+ rects,
712
473
  )
713
474
  if save_image:
475
+ assert self.page_url is not None
714
476
  parsed_url = urllib.parse.urlparse(self.page_url)
715
477
  # Max length is set to 241 as there are 10 characters for the
716
478
  # timestamp and 4 characters for the file extension:
@@ -727,25 +489,30 @@ class BaseBrowser:
727
489
 
728
490
  def scroll_up(self) -> None:
729
491
  r"""Scroll up the page."""
492
+ assert self.page is not None
730
493
  self.page.keyboard.press("PageUp")
731
494
 
732
495
  def scroll_down(self) -> None:
733
496
  r"""Scroll down the page."""
497
+ assert self.page is not None
734
498
  self.page.keyboard.press("PageDown")
735
499
 
736
500
  def get_url(self) -> str:
737
501
  r"""Get the URL of the current page."""
502
+ assert self.page is not None
738
503
  return self.page.url
739
504
 
740
505
  def click_id(self, identifier: Union[str, int]) -> None:
741
506
  r"""Click an element with the given identifier."""
507
+ assert self.page is not None
742
508
  if isinstance(identifier, int):
743
509
  identifier = str(identifier)
744
510
  target = self.page.locator(f"[__elementId='{identifier}']")
745
511
 
746
512
  try:
747
513
  target.wait_for(timeout=5000)
748
- except (TimeoutError, Exception) as e:
514
+ except Exception as e: # Consider using playwright specific
515
+ # TimeoutError
749
516
  logger.debug(f"Error during click operation: {e}")
750
517
  raise ValueError("No such element.") from None
751
518
 
@@ -754,7 +521,13 @@ class BaseBrowser:
754
521
  new_page = None
755
522
  try:
756
523
  with self.page.expect_event("popup", timeout=1000) as page_info:
757
- box = cast(Dict[str, Union[int, float]], target.bounding_box())
524
+ box: Optional[FloatRect] = target.bounding_box()
525
+ if box is None:
526
+ logger.warning(
527
+ f"Bounding box not found for element '{identifier}'. "
528
+ f"Cannot click."
529
+ )
530
+ return
758
531
  self.page.mouse.click(
759
532
  box["x"] + box["width"] / 2, box["y"] + box["height"] / 2
760
533
  )
@@ -765,7 +538,8 @@ class BaseBrowser:
765
538
  self.page_history.append(deepcopy(self.page.url))
766
539
  self.page = new_page
767
540
 
768
- except (TimeoutError, Exception) as e:
541
+ except Exception as e: # Consider using playwright specific
542
+ # TimeoutError
769
543
  logger.debug(f"Error during click operation: {e}")
770
544
  pass
771
545
 
@@ -773,6 +547,7 @@ class BaseBrowser:
773
547
 
774
548
  def extract_url_content(self) -> str:
775
549
  r"""Extract the content of the current page."""
550
+ assert self.page is not None
776
551
  content = self.page.content()
777
552
  return content
778
553
 
@@ -781,17 +556,17 @@ class BaseBrowser:
781
556
 
782
557
  Args:
783
558
  identifier (str): The identifier of the file to download.
784
- file_path (str): The path to save the downloaded file.
785
559
 
786
560
  Returns:
787
561
  str: The result of the action.
788
562
  """
789
-
563
+ assert self.page is not None
790
564
  if isinstance(identifier, int):
791
565
  identifier = str(identifier)
792
566
  try:
793
567
  target = self.page.locator(f"[__elementId='{identifier}']")
794
- except (TimeoutError, Exception) as e:
568
+ except Exception as e: # Consider using playwright specific
569
+ # TimeoutError
795
570
  logger.debug(f"Error during download operation: {e}")
796
571
  logger.warning(
797
572
  f"Element with identifier '{identifier}' not found."
@@ -800,7 +575,7 @@ class BaseBrowser:
800
575
 
801
576
  target.scroll_into_view_if_needed()
802
577
 
803
- file_path = os.path.join(self.cache_dir)
578
+ file_path_val = os.path.join(self.cache_dir)
804
579
  self._wait_for_load()
805
580
 
806
581
  try:
@@ -809,12 +584,13 @@ class BaseBrowser:
809
584
  download = download_info.value
810
585
  file_name = download.suggested_filename
811
586
 
812
- file_path = os.path.join(file_path, file_name)
813
- download.save_as(file_path)
587
+ file_path_val = os.path.join(file_path_val, file_name)
588
+ download.save_as(file_path_val)
814
589
 
815
- return f"Downloaded file to path '{file_path}'."
590
+ return f"Downloaded file to path '{file_path_val}'."
816
591
 
817
- except (TimeoutError, Exception) as e:
592
+ except Exception as e: # Consider using playwright specific
593
+ # TimeoutError
818
594
  logger.debug(f"Error during download operation: {e}")
819
595
  return f"Failed to download file with identifier '{identifier}'."
820
596
 
@@ -828,12 +604,14 @@ class BaseBrowser:
828
604
  Returns:
829
605
  str: The result of the action.
830
606
  """
607
+ assert self.page is not None
831
608
  if isinstance(identifier, int):
832
609
  identifier = str(identifier)
833
610
 
834
611
  try:
835
612
  target = self.page.locator(f"[__elementId='{identifier}']")
836
- except (TimeoutError, Exception) as e:
613
+ except Exception as e: # Consider using playwright specific
614
+ # TimeoutError
837
615
  logger.debug(f"Error during fill operation: {e}")
838
616
  logger.warning(
839
617
  f"Element with identifier '{identifier}' not found."
@@ -844,7 +622,8 @@ class BaseBrowser:
844
622
  target.focus()
845
623
  try:
846
624
  target.fill(text)
847
- except (TimeoutError, Exception) as e:
625
+ except Exception as e: # Consider using playwright specific
626
+ # TimeoutError
848
627
  logger.debug(f"Error during fill operation: {e}")
849
628
  target.press_sequentially(text)
850
629
 
@@ -856,11 +635,13 @@ class BaseBrowser:
856
635
  )
857
636
 
858
637
  def scroll_to_bottom(self) -> str:
638
+ assert self.page is not None
859
639
  self.page.evaluate("window.scrollTo(0, document.body.scrollHeight);")
860
640
  self._wait_for_load()
861
641
  return "Scrolled to the bottom of the page."
862
642
 
863
643
  def scroll_to_top(self) -> str:
644
+ assert self.page is not None
864
645
  self.page.evaluate("window.scrollTo(0, 0);")
865
646
  self._wait_for_load()
866
647
  return "Scrolled to the top of the page."
@@ -874,11 +655,13 @@ class BaseBrowser:
874
655
  Returns:
875
656
  str: The result of the action.
876
657
  """
658
+ assert self.page is not None
877
659
  if isinstance(identifier, int):
878
660
  identifier = str(identifier)
879
661
  try:
880
662
  target = self.page.locator(f"[__elementId='{identifier}']")
881
- except (TimeoutError, Exception) as e:
663
+ except Exception as e: # Consider using playwright specific
664
+ # TimeoutError
882
665
  logger.debug(f"Error during hover operation: {e}")
883
666
  logger.warning(
884
667
  f"Element with identifier '{identifier}' not found."
@@ -895,16 +678,18 @@ class BaseBrowser:
895
678
  targeted text. It is equivalent to pressing Ctrl + F and searching for
896
679
  the text.
897
680
  """
898
- # ruff: noqa: E501
681
+ assert self.page is not None
899
682
  script = f"""
900
683
  (function() {{
901
684
  let text = "{search_text}";
902
685
  let found = window.find(text);
903
686
  if (!found) {{
904
- let elements = document.querySelectorAll("*:not(script):not(style)");
687
+ let elements = document.querySelectorAll("*:not(script):not(
688
+ style)");
905
689
  for (let el of elements) {{
906
690
  if (el.innerText && el.innerText.includes(text)) {{
907
- el.scrollIntoView({{behavior: "smooth", block: "center"}});
691
+ el.scrollIntoView({{behavior: "smooth", block:
692
+ "center"}});
908
693
  el.style.backgroundColor = "yellow";
909
694
  el.style.border = '2px solid red';
910
695
  return true;
@@ -915,7 +700,8 @@ class BaseBrowser:
915
700
  return true;
916
701
  }})();
917
702
  """
918
- found = self.page.evaluate(script)
703
+ found_eval = self.page.evaluate(script)
704
+ found = cast(bool, found_eval) # Ensure found is bool
919
705
  self._wait_for_load()
920
706
  if found:
921
707
  return f"Found text '{search_text}' on the page."
@@ -924,7 +710,7 @@ class BaseBrowser:
924
710
 
925
711
  def back(self):
926
712
  r"""Navigate back to the previous page."""
927
-
713
+ assert self.page is not None
928
714
  page_url_before = self.page.url
929
715
  self.page.go_back()
930
716
 
@@ -942,15 +728,24 @@ class BaseBrowser:
942
728
  self._wait_for_load()
943
729
 
944
730
  def close(self):
945
- self.browser.close()
731
+ if self.context is not None:
732
+ self.context.close()
733
+ if (
734
+ self.browser is not None
735
+ ): # Only close browser if it was launched separately
736
+ self.browser.close()
737
+ if self.playwright:
738
+ self.playwright.stop() # Stop playwright instance
946
739
 
947
- # ruff: noqa: E501
948
740
  def show_interactive_elements(self):
949
741
  r"""Show simple interactive elements on the current page."""
742
+ assert self.page is not None
950
743
  self.page.evaluate(self.page_script)
951
744
  self.page.evaluate("""
952
745
  () => {
953
- document.querySelectorAll('a, button, input, select, textarea, [tabindex]:not([tabindex="-1"]), [contenteditable="true"]').forEach(el => {
746
+ document.querySelectorAll('a, button, input, select, textarea,
747
+ [tabindex]:not([tabindex="-1"]),
748
+ [contenteditable="true"]').forEach(el => {
954
749
  el.style.border = '2px solid red';
955
750
  });
956
751
  }
@@ -960,6 +755,7 @@ class BaseBrowser:
960
755
  def get_webpage_content(self) -> str:
961
756
  from html2text import html2text
962
757
 
758
+ assert self.page is not None
963
759
  self._wait_for_load()
964
760
  html_content = self.page.content()
965
761
 
@@ -1026,11 +822,15 @@ class BrowserToolkit(BaseToolkit):
1026
822
  planning_agent_model: Optional[BaseModelBackend] = None,
1027
823
  output_language: str = "en",
1028
824
  cookie_json_path: Optional[str] = None,
825
+ user_data_dir: Optional[str] = None,
1029
826
  ):
1030
827
  r"""Initialize the BrowserToolkit instance.
1031
828
 
1032
829
  Args:
1033
830
  headless (bool): Whether to run the browser in headless mode.
831
+ When running inside a CAMEL runtime container, this is
832
+ automatically set to True since containers typically don't
833
+ have a display.
1034
834
  cache_dir (Union[str, None]): The directory to store cache files.
1035
835
  channel (Literal["chrome", "msedge", "chromium"]): The browser
1036
836
  channel to use. Must be one of "chrome", "msedge", or
@@ -1045,25 +845,47 @@ class BrowserToolkit(BaseToolkit):
1045
845
  (default: :obj:`"en`")
1046
846
  cookie_json_path (Optional[str]): Path to a JSON file containing
1047
847
  authentication cookies and browser storage state. If provided
1048
- and the file exists, the browser will load this state to maintain
848
+ and the file exists, the browser will load this state to
849
+ maintain
1049
850
  authenticated sessions without requiring manual login.
1050
851
  (default: :obj:`None`)
852
+ user_data_dir (Optional[str]): The directory to store user data
853
+ for persistent context. If None, a fresh browser instance
854
+ is used without saving data. (default: :obj:`None`)
1051
855
  """
856
+ super().__init__() # Call to super().__init__() added
857
+
858
+ # auto-detect if running inside a CAMEL runtime container
859
+ # force headless mode since containers typically don't have a display
860
+ in_runtime = os.environ.get("CAMEL_RUNTIME", "").lower() == "true"
861
+ if in_runtime and not headless:
862
+ logger.info(
863
+ "Detected CAMEL_RUNTIME environment - enabling headless mode "
864
+ "since containers typically don't have a display"
865
+ )
866
+ headless = True
1052
867
 
1053
868
  self.browser = BaseBrowser(
1054
869
  headless=headless,
1055
870
  cache_dir=cache_dir,
1056
871
  channel=channel,
1057
872
  cookie_json_path=cookie_json_path,
873
+ user_data_dir=user_data_dir,
1058
874
  )
875
+ self.browser.web_agent_model = web_agent_model # Pass model to
876
+ # BaseBrowser instance
1059
877
 
1060
878
  self.history_window = history_window
1061
879
  self.web_agent_model = web_agent_model
1062
880
  self.planning_agent_model = planning_agent_model
1063
881
  self.output_language = output_language
1064
882
 
1065
- self.history: list = []
1066
- self.web_agent, self.planning_agent = self._initialize_agent()
883
+ self.history: List[Dict[str, Any]] = [] # Typed history list
884
+ self.web_agent: ChatAgent
885
+ self.planning_agent: ChatAgent
886
+ self.web_agent, self.planning_agent = self._initialize_agent(
887
+ web_agent_model, planning_agent_model
888
+ )
1067
889
 
1068
890
  def _reset(self):
1069
891
  self.web_agent.reset()
@@ -1071,43 +893,40 @@ class BrowserToolkit(BaseToolkit):
1071
893
  self.history = []
1072
894
  os.makedirs(self.browser.cache_dir, exist_ok=True)
1073
895
 
1074
- def _initialize_agent(self) -> Tuple["ChatAgent", "ChatAgent"]:
896
+ def _initialize_agent(
897
+ self,
898
+ web_agent_model_backend: Optional[BaseModelBackend],
899
+ planning_agent_model_backend: Optional[BaseModelBackend],
900
+ ) -> Tuple[ChatAgent, ChatAgent]:
1075
901
  r"""Initialize the agent."""
1076
902
  from camel.agents import ChatAgent
1077
903
 
1078
- if self.web_agent_model is None:
1079
- web_agent_model = ModelFactory.create(
1080
- model_platform=ModelPlatformType.OPENAI,
1081
- model_type=ModelType.GPT_4_1,
904
+ if web_agent_model_backend is None:
905
+ web_agent_model_instance = ModelFactory.create(
906
+ model_platform=ModelPlatformType.DEFAULT,
907
+ model_type=ModelType.DEFAULT,
1082
908
  model_config_dict={"temperature": 0, "top_p": 1},
1083
909
  )
1084
910
  else:
1085
- web_agent_model = self.web_agent_model
911
+ web_agent_model_instance = web_agent_model_backend
1086
912
 
1087
- if self.planning_agent_model is None:
913
+ if planning_agent_model_backend is None:
1088
914
  planning_model = ModelFactory.create(
1089
- model_platform=ModelPlatformType.OPENAI,
1090
- model_type=ModelType.O3_MINI,
915
+ model_platform=ModelPlatformType.DEFAULT,
916
+ model_type=ModelType.DEFAULT,
1091
917
  )
1092
918
  else:
1093
- planning_model = self.planning_agent_model
919
+ planning_model = planning_agent_model_backend
1094
920
 
1095
- system_prompt = """
1096
- You are a helpful web agent that can assist users in browsing the web.
1097
- Given a high-level task, you can leverage predefined browser tools to help
1098
- users achieve their goals.
1099
- """
921
+ system_prompt = WEB_AGENT_SYSTEM_PROMPT
1100
922
 
1101
923
  web_agent = ChatAgent(
1102
924
  system_message=system_prompt,
1103
- model=web_agent_model,
925
+ model=web_agent_model_instance,
1104
926
  output_language=self.output_language,
1105
927
  )
1106
928
 
1107
- planning_system_prompt = """
1108
- You are a helpful planning agent that can assist users in planning complex
1109
- tasks which need multi-step browser interaction.
1110
- """
929
+ planning_system_prompt = PLANNING_AGENT_SYSTEM_PROMPT
1111
930
 
1112
931
  planning_agent = ChatAgent(
1113
932
  system_message=planning_system_prompt,
@@ -1120,96 +939,24 @@ tasks which need multi-step browser interaction.
1120
939
  def _observe(
1121
940
  self, task_prompt: str, detailed_plan: Optional[str] = None
1122
941
  ) -> Tuple[str, str, str]:
1123
- r"""Let agent observe the current environment, and get the next action."""
942
+ r"""Let agent observe the current environment, and get the next
943
+ action."""
1124
944
 
1125
- detailed_plan_prompt = ""
945
+ detailed_plan_prompt_str = ""
1126
946
 
1127
947
  if detailed_plan is not None:
1128
- detailed_plan_prompt = f"""
948
+ detailed_plan_prompt_str = f"""
1129
949
  Here is a plan about how to solve the task step-by-step which you must follow:
1130
950
  <detailed_plan>{detailed_plan}<detailed_plan>
1131
951
  """
1132
952
 
1133
- observe_prompt = f"""
1134
- Please act as a web agent to help me complete the following high-level task:
1135
- <task>{task_prompt}</task>
1136
- Now, I have made screenshot (only the current viewport, not the full webpage)
1137
- based on the current browser state, and marked interactive elements in the
1138
- webpage.
1139
- Please carefully examine the requirements of the task, and current state of
1140
- the browser, and provide the next appropriate action to take.
1141
-
1142
- {detailed_plan_prompt}
1143
-
1144
- Here are the current available browser functions you can use:
1145
- {AVAILABLE_ACTIONS_PROMPT}
1146
-
1147
- Here are the latest {self.history_window} trajectory (at most) you have taken:
1148
- <history>
1149
- {self.history[-self.history_window :]}
1150
- </history>
1151
-
1152
- Your output should be in json format, including the following fields:
1153
- - `observation`: The detailed image description about the current viewport. Do
1154
- not over-confident about the correctness of the history actions. You should
1155
- always check the current viewport to make sure the correctness of the next
1156
- action.
1157
- - `reasoning`: The reasoning about the next action you want to take, and the
1158
- possible obstacles you may encounter, and how to solve them. Do not forget to
1159
- check the history actions to avoid the same mistakes.
1160
- - `action_code`: The action code you want to take. It is only one step action
1161
- code, without any other texts (such as annotation)
1162
-
1163
- Here is two example of the output:
1164
- ```json
1165
- {{
1166
- "observation": [IMAGE_DESCRIPTION],
1167
- "reasoning": [YOUR_REASONING],
1168
- "action_code": "fill_input_id([ID], [TEXT])"
1169
- }}
1170
-
1171
- {{
1172
- "observation": "The current page is a CAPTCHA verification page on Amazon. It asks the user to ..",
1173
- "reasoning": "To proceed with the task of searching for products, I need to complete..",
1174
- "action_code": "fill_input_id(3, 'AUXPMR')"
1175
- }}
1176
-
1177
- Here are some tips for you:
1178
- - Never forget the overall question: **{task_prompt}**
1179
- - Maybe after a certain operation (e.g. click_id), the page content has not
1180
- changed. You can check whether the action step is successful by looking at the
1181
- `success` of the action step in the history. If successful, it means that the
1182
- page content is indeed the same after the click. You need to try other methods.
1183
- - If using one way to solve the problem is not successful, try other ways.
1184
- Make sure your provided ID is correct!
1185
- - Some cases are very complex and need to be achieve by an iterative process.
1186
- You can use the `back()` function to go back to the previous page to try other
1187
- methods.
1188
- - There are many links on the page, which may be useful for solving the
1189
- problem. You can use the `click_id()` function to click on the link to see if
1190
- it is useful.
1191
- - Always keep in mind that your action must be based on the ID shown in the
1192
- current image or viewport, not the ID shown in the history.
1193
- - Do not use `stop()` lightly. Always remind yourself that the image only
1194
- shows a part of the full page. If you cannot find the answer, try to use
1195
- functions like `scroll_up()` and `scroll_down()` to check the full content of
1196
- the webpage before doing anything else, because the answer or next key step
1197
- may be hidden in the content below.
1198
- - If the webpage needs human verification, you must avoid processing it.
1199
- Please use `back()` to go back to the previous page, and try other ways.
1200
- - If you have tried everything and still cannot resolve the issue, please stop
1201
- the simulation, and report issues you have encountered.
1202
- - Check the history actions carefully, detect whether you have repeatedly made
1203
- the same actions or not.
1204
- - When dealing with wikipedia revision history related tasks, you need to
1205
- think about the solution flexibly. First, adjust the browsing history
1206
- displayed on a single page to the maximum, and then make use of the
1207
- find_text_on_page function. This is extremely useful which can quickly locate
1208
- the text you want to find and skip massive amount of useless information.
1209
- - Flexibly use interactive elements like slide down selection bar to filter
1210
- out the information you need. Sometimes they are extremely useful.
1211
- ```
1212
- """
953
+ observe_prompt = OBSERVE_PROMPT_TEMPLATE.format(
954
+ task_prompt=task_prompt,
955
+ detailed_plan_prompt=detailed_plan_prompt_str,
956
+ AVAILABLE_ACTIONS_PROMPT=AVAILABLE_ACTIONS_PROMPT,
957
+ history_window=self.history_window,
958
+ history=self.history[-self.history_window :],
959
+ )
1213
960
 
1214
961
  # get current state
1215
962
  som_screenshot, _ = self.browser.get_som_screenshot(save_image=True)
@@ -1223,7 +970,8 @@ out the information you need. Sometimes they are extremely useful.
1223
970
 
1224
971
  resp_content = resp.msgs[0].content
1225
972
 
1226
- resp_dict = _parse_json_output(resp_content)
973
+ resp_dict = _parse_json_output(resp_content, logger) # Pass logger to
974
+ # _parse_json_output
1227
975
  observation_result: str = resp_dict.get("observation", "")
1228
976
  reasoning_result: str = resp_dict.get("reasoning", "")
1229
977
  action_code: str = resp_dict.get("action_code", "")
@@ -1244,7 +992,10 @@ out the information you need. Sometimes they are extremely useful.
1244
992
  id_part = (
1245
993
  parts[0].replace("fill_input_id(", "").strip()
1246
994
  )
1247
- action_code = f"fill_input_id({id_part}, 'Please fill the text here.')"
995
+ action_code = (
996
+ f"fill_input_id({id_part}, 'Please "
997
+ f"fill the text here.')"
998
+ )
1248
999
 
1249
1000
  action_code = action_code.replace("`", "").strip()
1250
1001
 
@@ -1346,43 +1097,36 @@ out the information you need. Sometimes they are extremely useful.
1346
1097
  )
1347
1098
 
1348
1099
  def _get_final_answer(self, task_prompt: str) -> str:
1349
- r"""Get the final answer based on the task prompt and current browser state.
1350
- It is used when the agent thinks that the task can be completed without any further action, and answer can be directly found in the current viewport.
1100
+ r"""Get the final answer based on the task prompt and current
1101
+ browser state.
1102
+ It is used when the agent thinks that the task can be completed
1103
+ without any further action, and answer can be directly found in the
1104
+ current viewport.
1351
1105
  """
1352
1106
 
1353
- prompt = f"""
1354
- We are solving a complex web task which needs multi-step browser interaction. After the multi-step observation, reasoning and acting with web browser, we think that the task is currently solved.
1355
- Here are all trajectory we have taken:
1356
- <history>{self.history}</history>
1357
- Please find the final answer, or give valuable insights and founds (e.g. if previous actions contain downloading files, your output should include the path of the downloaded file) about the overall task: <task>{task_prompt}</task>
1358
- """
1107
+ prompt = GET_FINAL_ANSWER_PROMPT_TEMPLATE.format(
1108
+ history=self.history, task_prompt=task_prompt
1109
+ )
1359
1110
 
1360
1111
  message = BaseMessage.make_user_message(
1361
1112
  role_name='user',
1362
1113
  content=prompt,
1363
1114
  )
1364
-
1115
+ self.web_agent.reset() # Reset before step
1365
1116
  resp = self.web_agent.step(message)
1366
1117
  return resp.msgs[0].content
1367
1118
 
1368
1119
  def _task_planning(self, task_prompt: str, start_url: str) -> str:
1369
1120
  r"""Plan the task based on the given task prompt."""
1370
1121
 
1371
- # Here are the available browser functions we can use: {AVAILABLE_ACTIONS_PROMPT}
1372
-
1373
- planning_prompt = f"""
1374
- <task>{task_prompt}</task>
1375
- According to the problem above, if we use browser interaction, what is the general process of the interaction after visiting the webpage `{start_url}`?
1376
-
1377
- Please note that it can be viewed as Partially Observable MDP. Do not over-confident about your plan.
1378
- Please first restate the task in detail, and then provide a detailed plan to solve the task.
1379
- """
1380
- # Here are some tips for you: Please note that we can only see a part of the full page because of the limited viewport after an action. Thus, do not forget to use methods like `scroll_up()` and `scroll_down()` to check the full content of the webpage, because the answer or next key step may be hidden in the content below.
1122
+ planning_prompt = TASK_PLANNING_PROMPT_TEMPLATE.format(
1123
+ task_prompt=task_prompt, start_url=start_url
1124
+ )
1381
1125
 
1382
1126
  message = BaseMessage.make_user_message(
1383
1127
  role_name='user', content=planning_prompt
1384
1128
  )
1385
-
1129
+ self.planning_agent.reset() # Reset before step
1386
1130
  resp = self.planning_agent.step(message)
1387
1131
  return resp.msgs[0].content
1388
1132
 
@@ -1396,35 +1140,26 @@ Please first restate the task in detail, and then provide a detailed plan to sol
1396
1140
  detailed_plan (str): The detailed plan to replan.
1397
1141
 
1398
1142
  Returns:
1399
- Tuple[bool, str]: A tuple containing a boolean indicating whether the task needs to be replanned, and the replanned schema.
1143
+ Tuple[bool, str]: A tuple containing a boolean indicating
1144
+ whether the task needs to be replanned, and the replanned schema.
1400
1145
  """
1401
1146
 
1402
- # Here are the available browser functions we can use: {AVAILABLE_ACTIONS_PROMPT}
1403
- replanning_prompt = f"""
1404
- We are using browser interaction to solve a complex task which needs multi-step actions.
1405
- Here are the overall task:
1406
- <overall_task>{task_prompt}</overall_task>
1407
-
1408
- In order to solve the task, we made a detailed plan previously. Here is the detailed plan:
1409
- <detailed plan>{detailed_plan}</detailed plan>
1410
-
1411
- According to the task above, we have made a series of observations, reasonings, and actions. Here are the latest {self.history_window} trajectory (at most) we have taken:
1412
- <history>{self.history[-self.history_window :]}</history>
1413
-
1414
- However, the task is not completed yet. As the task is partially observable, we may need to replan the task based on the current state of the browser if necessary.
1415
- Now please carefully examine the current task planning schema, and our history actions, and then judge whether the task needs to be fundamentally replanned. If so, please provide a detailed replanned schema (including the restated overall task).
1416
-
1417
- Your output should be in json format, including the following fields:
1418
- - `if_need_replan`: bool, A boolean value indicating whether the task needs to be fundamentally replanned.
1419
- - `replanned_schema`: str, The replanned schema for the task, which should not be changed too much compared with the original one. If the task does not need to be replanned, the value should be an empty string.
1420
- """
1147
+ replanning_prompt = TASK_REPLANNING_PROMPT_TEMPLATE.format(
1148
+ task_prompt=task_prompt,
1149
+ detailed_plan=detailed_plan,
1150
+ history_window=self.history_window,
1151
+ history=self.history[-self.history_window :],
1152
+ )
1421
1153
  # Reset the history message of planning_agent.
1422
1154
  self.planning_agent.reset()
1423
1155
  resp = self.planning_agent.step(replanning_prompt)
1424
- resp_dict = _parse_json_output(resp.msgs[0].content)
1156
+ resp_dict = _parse_json_output(
1157
+ resp.msgs[0].content, logger
1158
+ ) # Pass logger
1425
1159
 
1426
- if_need_replan = resp_dict.get("if_need_replan", False)
1427
- replanned_schema = resp_dict.get("replanned_schema", "")
1160
+ if_need_replan_eval = resp_dict.get("if_need_replan", False)
1161
+ if_need_replan = cast(bool, if_need_replan_eval) # Ensure bool
1162
+ replanned_schema: str = resp_dict.get("replanned_schema", "")
1428
1163
 
1429
1164
  if if_need_replan:
1430
1165
  return True, replanned_schema
@@ -1463,10 +1198,10 @@ Your output should be in json format, including the following fields:
1463
1198
  logger.debug(f"Observation: {observation}")
1464
1199
  logger.debug(f"Reasoning: {reasoning}")
1465
1200
  logger.debug(f"Action code: {action_code}")
1466
-
1201
+ trajectory_info: Dict[str, Any]
1467
1202
  if "stop" in action_code:
1468
1203
  task_completed = True
1469
- trajectory_info = {
1204
+ trajectory_info = { # Typed trajectory_info
1470
1205
  "round": i,
1471
1206
  "observation": observation,
1472
1207
  "thought": reasoning,
@@ -1483,7 +1218,7 @@ Your output should be in json format, including the following fields:
1483
1218
  if not success:
1484
1219
  logger.warning(f"Error while executing the action: {info}")
1485
1220
 
1486
- trajectory_info = {
1221
+ trajectory_info = { # Typed trajectory_info
1487
1222
  "round": i,
1488
1223
  "observation": observation,
1489
1224
  "thought": reasoning,
@@ -1502,15 +1237,20 @@ Your output should be in json format, including the following fields:
1502
1237
  detailed_plan = replanned_schema
1503
1238
  logger.debug(f"Replanned schema: {replanned_schema}")
1504
1239
 
1240
+ simulation_result: str
1505
1241
  if not task_completed:
1506
1242
  simulation_result = f"""
1507
- The task is not completed within the round limit. Please check the last round {self.history_window} information to see if there is any useful information:
1243
+ The task is not completed within the round limit. Please
1244
+ check the last round {self.history_window} information to
1245
+ see if there is any useful information:
1508
1246
  <history>{self.history[-self.history_window :]}</history>
1509
1247
  """
1510
1248
 
1511
1249
  else:
1512
1250
  simulation_result = self._get_final_answer(task_prompt)
1513
1251
 
1252
+ self.browser.close() # Close browser after task completion or limit
1253
+ # reached
1514
1254
  return simulation_result
1515
1255
 
1516
1256
  def get_tools(self) -> List[FunctionTool]: