camel-ai 0.2.59__py3-none-any.whl → 0.2.82__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +3 -3
- camel/agents/__init__.py +2 -2
- camel/agents/_types.py +9 -4
- camel/agents/_utils.py +40 -2
- camel/agents/base.py +2 -2
- camel/agents/chat_agent.py +5012 -902
- camel/agents/critic_agent.py +2 -2
- camel/agents/deductive_reasoner_agent.py +56 -56
- camel/agents/embodied_agent.py +2 -2
- camel/agents/knowledge_graph_agent.py +20 -20
- camel/agents/mcp_agent.py +39 -36
- camel/agents/multi_hop_generator_agent.py +3 -3
- camel/agents/programmed_agent_instruction.py +2 -2
- camel/agents/repo_agent.py +4 -3
- camel/agents/role_assignment_agent.py +2 -2
- camel/agents/search_agent.py +2 -2
- camel/agents/task_agent.py +2 -2
- camel/agents/tool_agents/__init__.py +2 -2
- camel/agents/tool_agents/base.py +2 -2
- camel/agents/tool_agents/hugging_face_tool_agent.py +3 -3
- camel/benchmarks/__init__.py +2 -2
- camel/benchmarks/apibank.py +5 -5
- camel/benchmarks/apibench.py +2 -2
- camel/benchmarks/base.py +2 -2
- camel/benchmarks/browsecomp.py +44 -33
- camel/benchmarks/gaia.py +17 -13
- camel/benchmarks/mock_website/README.md +94 -0
- camel/benchmarks/mock_website/mock_web.py +299 -0
- camel/benchmarks/mock_website/requirements.txt +3 -0
- camel/benchmarks/mock_website/shopping_mall/app.py +465 -0
- camel/benchmarks/mock_website/task.json +104 -0
- camel/benchmarks/nexus.py +3 -3
- camel/benchmarks/ragbench.py +2 -2
- camel/bots/__init__.py +2 -2
- camel/bots/discord/__init__.py +2 -2
- camel/bots/discord/discord_app.py +2 -2
- camel/bots/discord/discord_installation.py +2 -2
- camel/bots/discord/discord_store.py +3 -3
- camel/bots/slack/__init__.py +2 -2
- camel/bots/slack/models.py +4 -4
- camel/bots/slack/slack_app.py +2 -2
- camel/bots/telegram_bot.py +2 -2
- camel/configs/__init__.py +26 -2
- camel/configs/aihubmix_config.py +90 -0
- camel/configs/aiml_config.py +2 -2
- camel/configs/amd_config.py +70 -0
- camel/configs/anthropic_config.py +8 -7
- camel/configs/base_config.py +2 -2
- camel/configs/bedrock_config.py +5 -3
- camel/configs/cerebras_config.py +98 -0
- camel/configs/cohere_config.py +3 -3
- camel/configs/cometapi_config.py +106 -0
- camel/configs/crynux_config.py +94 -0
- camel/configs/deepseek_config.py +9 -8
- camel/configs/gemini_config.py +6 -4
- camel/configs/groq_config.py +6 -4
- camel/configs/internlm_config.py +6 -4
- camel/configs/litellm_config.py +2 -2
- camel/configs/lmstudio_config.py +6 -4
- camel/configs/minimax_config.py +95 -0
- camel/configs/mistral_config.py +3 -3
- camel/configs/modelscope_config.py +5 -3
- camel/configs/moonshot_config.py +2 -2
- camel/configs/nebius_config.py +105 -0
- camel/configs/netmind_config.py +2 -2
- camel/configs/novita_config.py +2 -2
- camel/configs/nvidia_config.py +2 -2
- camel/configs/ollama_config.py +2 -2
- camel/configs/openai_config.py +8 -3
- camel/configs/openrouter_config.py +6 -4
- camel/configs/ppio_config.py +2 -2
- camel/configs/qianfan_config.py +85 -0
- camel/configs/qwen_config.py +2 -2
- camel/configs/reka_config.py +3 -3
- camel/configs/samba_config.py +8 -6
- camel/configs/sglang_config.py +2 -2
- camel/configs/siliconflow_config.py +2 -2
- camel/configs/togetherai_config.py +2 -2
- camel/configs/vllm_config.py +4 -2
- camel/configs/watsonx_config.py +2 -2
- camel/configs/yi_config.py +6 -4
- camel/configs/zhipuai_config.py +6 -4
- camel/{data_collector → data_collectors}/__init__.py +2 -2
- camel/{data_collector → data_collectors}/alpaca_collector.py +19 -10
- camel/{data_collector → data_collectors}/base.py +2 -2
- camel/{data_collector → data_collectors}/sharegpt_collector.py +3 -3
- camel/datagen/__init__.py +2 -2
- camel/datagen/cot_datagen.py +32 -37
- camel/datagen/evol_instruct/__init__.py +2 -2
- camel/datagen/evol_instruct/evol_instruct.py +2 -2
- camel/datagen/evol_instruct/scorer.py +24 -25
- camel/datagen/evol_instruct/templates.py +48 -48
- camel/datagen/self_improving_cot.py +5 -5
- camel/datagen/self_instruct/__init__.py +2 -2
- camel/datagen/self_instruct/filter/__init__.py +2 -2
- camel/datagen/self_instruct/filter/filter_function.py +2 -2
- camel/datagen/self_instruct/filter/filter_registry.py +2 -2
- camel/datagen/self_instruct/filter/instruction_filter.py +2 -2
- camel/datagen/self_instruct/self_instruct.py +2 -2
- camel/datagen/self_instruct/templates.py +47 -47
- camel/datagen/source2synth/__init__.py +2 -2
- camel/datagen/source2synth/data_processor.py +2 -2
- camel/datagen/source2synth/models.py +2 -2
- camel/datagen/source2synth/user_data_processor_config.py +2 -2
- camel/datahubs/__init__.py +2 -2
- camel/datahubs/base.py +2 -2
- camel/datahubs/huggingface.py +2 -2
- camel/datahubs/models.py +2 -2
- camel/datasets/__init__.py +2 -2
- camel/datasets/base_generator.py +41 -12
- camel/datasets/few_shot_generator.py +18 -18
- camel/datasets/models.py +3 -3
- camel/datasets/self_instruct_generator.py +2 -2
- camel/datasets/static_dataset.py +152 -2
- camel/embeddings/__init__.py +2 -2
- camel/embeddings/azure_embedding.py +2 -2
- camel/embeddings/base.py +2 -2
- camel/embeddings/gemini_embedding.py +2 -2
- camel/embeddings/jina_embedding.py +10 -3
- camel/embeddings/mistral_embedding.py +2 -2
- camel/embeddings/openai_compatible_embedding.py +2 -2
- camel/embeddings/openai_embedding.py +2 -2
- camel/embeddings/sentence_transformers_embeddings.py +4 -4
- camel/embeddings/together_embedding.py +2 -2
- camel/embeddings/vlm_embedding.py +11 -4
- camel/environments/__init__.py +14 -2
- camel/environments/models.py +2 -2
- camel/environments/multi_step.py +2 -2
- camel/environments/rlcards_env.py +860 -0
- camel/environments/single_step.py +30 -5
- camel/environments/tic_tac_toe.py +3 -3
- camel/extractors/__init__.py +2 -2
- camel/extractors/base.py +2 -2
- camel/extractors/python_strategies.py +2 -2
- camel/generators.py +2 -2
- camel/human.py +2 -2
- camel/interpreters/__init__.py +4 -2
- camel/interpreters/base.py +16 -3
- camel/interpreters/docker/Dockerfile +53 -7
- camel/interpreters/docker_interpreter.py +70 -11
- camel/interpreters/e2b_interpreter.py +59 -11
- camel/interpreters/internal_python_interpreter.py +81 -4
- camel/interpreters/interpreter_error.py +2 -2
- camel/interpreters/ipython_interpreter.py +23 -5
- camel/interpreters/microsandbox_interpreter.py +395 -0
- camel/interpreters/subprocess_interpreter.py +36 -4
- camel/loaders/__init__.py +17 -5
- camel/loaders/apify_reader.py +2 -2
- camel/loaders/base_io.py +2 -2
- camel/loaders/base_loader.py +85 -0
- camel/loaders/chunkr_reader.py +128 -93
- camel/loaders/crawl4ai_reader.py +2 -2
- camel/loaders/firecrawl_reader.py +6 -6
- camel/loaders/jina_url_reader.py +2 -2
- camel/loaders/markitdown.py +2 -2
- camel/loaders/mineru_extractor.py +2 -2
- camel/loaders/mistral_reader.py +148 -0
- camel/loaders/scrapegraph_reader.py +2 -2
- camel/loaders/unstructured_io.py +2 -2
- camel/logger.py +5 -5
- camel/memories/__init__.py +2 -2
- camel/memories/agent_memories.py +86 -3
- camel/memories/base.py +36 -2
- camel/memories/blocks/__init__.py +2 -2
- camel/memories/blocks/chat_history_block.py +126 -9
- camel/memories/blocks/vectordb_block.py +10 -3
- camel/memories/context_creators/__init__.py +2 -2
- camel/memories/context_creators/score_based.py +31 -239
- camel/memories/records.py +98 -13
- camel/messages/__init__.py +2 -2
- camel/messages/base.py +193 -46
- camel/messages/conversion/__init__.py +2 -2
- camel/messages/conversion/alpaca.py +2 -2
- camel/messages/conversion/conversation_models.py +2 -2
- camel/messages/conversion/sharegpt/__init__.py +2 -2
- camel/messages/conversion/sharegpt/function_call_formatter.py +2 -2
- camel/messages/conversion/sharegpt/hermes/__init__.py +2 -2
- camel/messages/conversion/sharegpt/hermes/hermes_function_formatter.py +2 -2
- camel/messages/func_message.py +54 -17
- camel/models/__init__.py +18 -2
- camel/models/_utils.py +3 -3
- camel/models/aihubmix_model.py +83 -0
- camel/models/aiml_model.py +11 -18
- camel/models/amd_model.py +101 -0
- camel/models/anthropic_model.py +127 -20
- camel/models/aws_bedrock_model.py +12 -35
- camel/models/azure_openai_model.py +263 -63
- camel/models/base_audio_model.py +5 -3
- camel/models/base_model.py +195 -26
- camel/models/cerebras_model.py +83 -0
- camel/models/cohere_model.py +81 -21
- camel/models/cometapi_model.py +83 -0
- camel/models/crynux_model.py +87 -0
- camel/models/deepseek_model.py +61 -59
- camel/models/fish_audio_model.py +8 -2
- camel/models/gemini_model.py +439 -30
- camel/models/groq_model.py +11 -19
- camel/models/internlm_model.py +11 -18
- camel/models/litellm_model.py +94 -34
- camel/models/lmstudio_model.py +17 -20
- camel/models/minimax_model.py +83 -0
- camel/models/mistral_model.py +84 -19
- camel/models/model_factory.py +49 -6
- camel/models/model_manager.py +33 -11
- camel/models/modelscope_model.py +13 -193
- camel/models/moonshot_model.py +195 -21
- camel/models/nebius_model.py +83 -0
- camel/models/nemotron_model.py +19 -9
- camel/models/netmind_model.py +11 -18
- camel/models/novita_model.py +11 -18
- camel/models/nvidia_model.py +11 -18
- camel/models/ollama_model.py +14 -21
- camel/models/openai_audio_models.py +2 -2
- camel/models/openai_compatible_model.py +234 -27
- camel/models/openai_model.py +255 -39
- camel/models/openrouter_model.py +11 -19
- camel/models/ppio_model.py +11 -18
- camel/models/qianfan_model.py +89 -0
- camel/models/qwen_model.py +13 -193
- camel/models/reka_model.py +90 -21
- camel/models/reward/__init__.py +2 -2
- camel/models/reward/base_reward_model.py +2 -2
- camel/models/reward/evaluator.py +2 -2
- camel/models/reward/nemotron_model.py +2 -2
- camel/models/reward/skywork_model.py +2 -2
- camel/models/samba_model.py +117 -49
- camel/models/sglang_model.py +162 -42
- camel/models/siliconflow_model.py +12 -35
- camel/models/stub_model.py +10 -7
- camel/models/togetherai_model.py +11 -18
- camel/models/vllm_model.py +10 -18
- camel/models/volcano_model.py +16 -20
- camel/models/watsonx_model.py +69 -19
- camel/models/yi_model.py +11 -18
- camel/models/zhipuai_model.py +70 -18
- camel/parsers/__init__.py +18 -0
- camel/parsers/mcp_tool_call_parser.py +176 -0
- camel/personas/__init__.py +2 -2
- camel/personas/persona.py +2 -2
- camel/personas/persona_hub.py +2 -2
- camel/prompts/__init__.py +2 -2
- camel/prompts/ai_society.py +2 -2
- camel/prompts/base.py +2 -2
- camel/prompts/code.py +2 -2
- camel/prompts/evaluation.py +2 -2
- camel/prompts/generate_text_embedding_data.py +2 -2
- camel/prompts/image_craft.py +2 -2
- camel/prompts/misalignment.py +2 -2
- camel/prompts/multi_condition_image_craft.py +2 -2
- camel/prompts/object_recognition.py +2 -2
- camel/prompts/persona_hub.py +3 -3
- camel/prompts/prompt_templates.py +2 -2
- camel/prompts/role_description_prompt_template.py +2 -2
- camel/prompts/solution_extraction.py +8 -8
- camel/prompts/task_prompt_template.py +2 -2
- camel/prompts/translation.py +2 -2
- camel/prompts/video_description_prompt.py +3 -3
- camel/responses/__init__.py +2 -2
- camel/responses/agent_responses.py +2 -2
- camel/retrievers/__init__.py +2 -2
- camel/retrievers/auto_retriever.py +23 -3
- camel/retrievers/base.py +2 -2
- camel/retrievers/bm25_retriever.py +3 -4
- camel/retrievers/cohere_rerank_retriever.py +2 -2
- camel/retrievers/hybrid_retrival.py +4 -4
- camel/retrievers/vector_retriever.py +2 -2
- camel/runtimes/Dockerfile.multi-toolkit +90 -0
- camel/{runtime → runtimes}/__init__.py +2 -2
- camel/runtimes/api.py +153 -0
- camel/{runtime → runtimes}/base.py +2 -2
- camel/{runtime → runtimes}/configs.py +13 -13
- camel/{runtime → runtimes}/daytona_runtime.py +18 -19
- camel/{runtime → runtimes}/docker_runtime.py +13 -13
- camel/{runtime → runtimes}/llm_guard_runtime.py +28 -28
- camel/{runtime → runtimes}/remote_http_runtime.py +12 -12
- camel/{runtime → runtimes}/ubuntu_docker_runtime.py +3 -3
- camel/{runtime → runtimes}/utils/__init__.py +2 -2
- camel/{runtime → runtimes}/utils/function_risk_toolkit.py +2 -2
- camel/{runtime → runtimes}/utils/ignore_risk_toolkit.py +2 -2
- camel/schemas/__init__.py +2 -2
- camel/schemas/base.py +2 -2
- camel/schemas/openai_converter.py +3 -3
- camel/schemas/outlines_converter.py +2 -2
- camel/services/agent_openapi_server.py +380 -0
- camel/societies/__init__.py +4 -2
- camel/societies/babyagi_playing.py +2 -2
- camel/societies/role_playing.py +201 -80
- camel/societies/workforce/__init__.py +10 -3
- camel/societies/workforce/base.py +9 -5
- camel/societies/workforce/events.py +143 -0
- camel/societies/workforce/prompts.py +258 -33
- camel/societies/workforce/role_playing_worker.py +95 -30
- camel/societies/workforce/single_agent_worker.py +659 -30
- camel/societies/workforce/structured_output_handler.py +512 -0
- camel/societies/workforce/task_channel.py +182 -38
- camel/societies/workforce/utils.py +784 -18
- camel/societies/workforce/worker.py +96 -28
- camel/societies/workforce/workflow_memory_manager.py +1746 -0
- camel/societies/workforce/workforce.py +5730 -366
- camel/societies/workforce/workforce_callback.py +103 -0
- camel/societies/workforce/workforce_logger.py +647 -0
- camel/societies/workforce/workforce_metrics.py +33 -0
- camel/storages/__init__.py +10 -2
- camel/storages/graph_storages/__init__.py +2 -2
- camel/storages/graph_storages/base.py +2 -2
- camel/storages/graph_storages/graph_element.py +2 -2
- camel/storages/graph_storages/nebula_graph.py +4 -4
- camel/storages/graph_storages/neo4j_graph.py +7 -7
- camel/storages/key_value_storages/__init__.py +2 -2
- camel/storages/key_value_storages/base.py +2 -2
- camel/storages/key_value_storages/in_memory.py +2 -2
- camel/storages/key_value_storages/json.py +17 -4
- camel/storages/key_value_storages/mem0_cloud.py +50 -49
- camel/storages/key_value_storages/redis.py +2 -2
- camel/storages/object_storages/__init__.py +2 -2
- camel/storages/object_storages/amazon_s3.py +2 -2
- camel/storages/object_storages/azure_blob.py +2 -2
- camel/storages/object_storages/base.py +2 -2
- camel/storages/object_storages/google_cloud.py +3 -3
- camel/storages/vectordb_storages/__init__.py +12 -2
- camel/storages/vectordb_storages/base.py +2 -2
- camel/storages/vectordb_storages/chroma.py +731 -0
- camel/storages/vectordb_storages/faiss.py +712 -0
- camel/storages/vectordb_storages/milvus.py +2 -2
- camel/storages/vectordb_storages/oceanbase.py +16 -17
- camel/storages/vectordb_storages/pgvector.py +349 -0
- camel/storages/vectordb_storages/qdrant.py +6 -6
- camel/storages/vectordb_storages/surreal.py +372 -0
- camel/storages/vectordb_storages/tidb.py +11 -8
- camel/storages/vectordb_storages/weaviate.py +714 -0
- camel/tasks/__init__.py +2 -2
- camel/tasks/task.py +366 -27
- camel/tasks/task_prompt.py +3 -3
- camel/terminators/__init__.py +2 -2
- camel/terminators/base.py +2 -2
- camel/terminators/response_terminator.py +2 -2
- camel/terminators/token_limit_terminator.py +2 -2
- camel/toolkits/__init__.py +58 -10
- camel/toolkits/aci_toolkit.py +66 -21
- camel/toolkits/arxiv_toolkit.py +8 -8
- camel/toolkits/ask_news_toolkit.py +2 -2
- camel/toolkits/async_browser_toolkit.py +174 -575
- camel/toolkits/audio_analysis_toolkit.py +3 -3
- camel/toolkits/base.py +65 -7
- camel/toolkits/bohrium_toolkit.py +318 -0
- camel/toolkits/browser_toolkit.py +306 -566
- camel/toolkits/browser_toolkit_commons.py +568 -0
- camel/toolkits/code_execution.py +67 -11
- camel/toolkits/context_summarizer_toolkit.py +684 -0
- camel/toolkits/craw4ai_toolkit.py +93 -0
- camel/toolkits/dappier_toolkit.py +12 -8
- camel/toolkits/data_commons_toolkit.py +2 -2
- camel/toolkits/dingtalk.py +1135 -0
- camel/toolkits/earth_science_toolkit.py +5367 -0
- camel/toolkits/edgeone_pages_mcp_toolkit.py +49 -0
- camel/toolkits/excel_toolkit.py +910 -70
- camel/toolkits/file_toolkit.py +1402 -0
- camel/toolkits/function_tool.py +128 -20
- camel/toolkits/github_toolkit.py +148 -43
- camel/toolkits/gmail_toolkit.py +1839 -0
- camel/toolkits/google_calendar_toolkit.py +40 -6
- camel/toolkits/google_drive_mcp_toolkit.py +54 -0
- camel/toolkits/google_maps_toolkit.py +2 -2
- camel/toolkits/google_scholar_toolkit.py +2 -2
- camel/toolkits/human_toolkit.py +36 -12
- camel/toolkits/hybrid_browser_toolkit/__init__.py +18 -0
- camel/toolkits/hybrid_browser_toolkit/config_loader.py +185 -0
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +246 -0
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +1973 -0
- camel/toolkits/hybrid_browser_toolkit/installer.py +203 -0
- camel/toolkits/hybrid_browser_toolkit/ts/package-lock.json +4589 -0
- camel/toolkits/hybrid_browser_toolkit/ts/package.json +33 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/browser-scripts.js +125 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +1929 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +233 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +589 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/index.ts +7 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/parent-child-filter.ts +226 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/snapshot-parser.ts +219 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/som-screenshot-injected.ts +543 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/types.ts +129 -0
- camel/toolkits/hybrid_browser_toolkit/ts/tsconfig.json +27 -0
- camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +319 -0
- camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +1037 -0
- camel/toolkits/hybrid_browser_toolkit_py/__init__.py +17 -0
- camel/toolkits/hybrid_browser_toolkit_py/actions.py +575 -0
- camel/toolkits/hybrid_browser_toolkit_py/agent.py +311 -0
- camel/toolkits/hybrid_browser_toolkit_py/browser_session.py +787 -0
- camel/toolkits/hybrid_browser_toolkit_py/config_loader.py +490 -0
- camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +2390 -0
- camel/toolkits/hybrid_browser_toolkit_py/snapshot.py +233 -0
- camel/toolkits/hybrid_browser_toolkit_py/stealth_script.js +0 -0
- camel/toolkits/hybrid_browser_toolkit_py/unified_analyzer.js +1043 -0
- camel/toolkits/image_analysis_toolkit.py +3 -3
- camel/toolkits/image_generation_toolkit.py +390 -0
- camel/toolkits/jina_reranker_toolkit.py +195 -79
- camel/toolkits/klavis_toolkit.py +7 -3
- camel/toolkits/linkedin_toolkit.py +2 -2
- camel/toolkits/markitdown_toolkit.py +104 -0
- camel/toolkits/math_toolkit.py +66 -12
- camel/toolkits/mcp_toolkit.py +841 -600
- camel/toolkits/memory_toolkit.py +7 -3
- camel/toolkits/meshy_toolkit.py +2 -2
- camel/toolkits/message_agent_toolkit.py +608 -0
- camel/toolkits/message_integration.py +724 -0
- camel/toolkits/mineru_toolkit.py +2 -2
- camel/toolkits/minimax_mcp_toolkit.py +195 -0
- camel/toolkits/networkx_toolkit.py +2 -2
- camel/toolkits/note_taking_toolkit.py +277 -0
- camel/toolkits/notion_mcp_toolkit.py +224 -0
- camel/toolkits/notion_toolkit.py +2 -2
- camel/toolkits/open_api_specs/biztoc/__init__.py +2 -2
- camel/toolkits/open_api_specs/biztoc/ai-plugin.json +1 -1
- camel/toolkits/open_api_specs/coursera/__init__.py +2 -2
- camel/toolkits/open_api_specs/create_qr_code/__init__.py +2 -2
- camel/toolkits/open_api_specs/klarna/__init__.py +2 -2
- camel/toolkits/open_api_specs/nasa_apod/__init__.py +2 -2
- camel/toolkits/open_api_specs/outschool/__init__.py +2 -2
- camel/toolkits/open_api_specs/outschool/ai-plugin.json +1 -1
- camel/toolkits/open_api_specs/outschool/openapi.yaml +1 -1
- camel/toolkits/open_api_specs/outschool/paths/__init__.py +2 -2
- camel/toolkits/open_api_specs/outschool/paths/get_classes.py +2 -2
- camel/toolkits/open_api_specs/outschool/paths/search_teachers.py +2 -2
- camel/toolkits/open_api_specs/security_config.py +2 -2
- camel/toolkits/open_api_specs/speak/__init__.py +2 -2
- camel/toolkits/open_api_specs/web_scraper/__init__.py +2 -2
- camel/toolkits/open_api_specs/web_scraper/ai-plugin.json +1 -1
- camel/toolkits/open_api_specs/web_scraper/paths/__init__.py +2 -2
- camel/toolkits/open_api_specs/web_scraper/paths/scraper.py +2 -2
- camel/toolkits/open_api_toolkit.py +2 -2
- camel/toolkits/openbb_toolkit.py +7 -3
- camel/toolkits/origene_mcp_toolkit.py +56 -0
- camel/toolkits/page_script.js +86 -74
- camel/toolkits/playwright_mcp_toolkit.py +27 -32
- camel/toolkits/pptx_toolkit.py +790 -0
- camel/toolkits/pubmed_toolkit.py +2 -2
- camel/toolkits/pulse_mcp_search_toolkit.py +2 -2
- camel/toolkits/pyautogui_toolkit.py +2 -2
- camel/toolkits/reddit_toolkit.py +2 -2
- camel/toolkits/resend_toolkit.py +168 -0
- camel/toolkits/retrieval_toolkit.py +2 -2
- camel/toolkits/screenshot_toolkit.py +213 -0
- camel/toolkits/search_toolkit.py +539 -146
- camel/toolkits/searxng_toolkit.py +2 -2
- camel/toolkits/semantic_scholar_toolkit.py +2 -2
- camel/toolkits/slack_toolkit.py +108 -58
- camel/toolkits/sql_toolkit.py +712 -0
- camel/toolkits/stripe_toolkit.py +2 -2
- camel/toolkits/sympy_toolkit.py +3 -3
- camel/toolkits/task_planning_toolkit.py +134 -0
- camel/toolkits/terminal_toolkit/__init__.py +18 -0
- camel/toolkits/terminal_toolkit/terminal_toolkit.py +1070 -0
- camel/toolkits/terminal_toolkit/utils.py +532 -0
- camel/toolkits/thinking_toolkit.py +3 -3
- camel/toolkits/twitter_toolkit.py +8 -3
- camel/toolkits/vertex_ai_veo_toolkit.py +590 -0
- camel/toolkits/video_analysis_toolkit.py +112 -29
- camel/toolkits/video_download_toolkit.py +22 -16
- camel/toolkits/weather_toolkit.py +2 -2
- camel/toolkits/web_deploy_toolkit.py +1219 -0
- camel/toolkits/wechat_official_toolkit.py +483 -0
- camel/toolkits/whatsapp_toolkit.py +2 -2
- camel/toolkits/wolfram_alpha_toolkit.py +53 -25
- camel/toolkits/zapier_toolkit.py +7 -3
- camel/types/__init__.py +4 -4
- camel/types/agents/__init__.py +2 -2
- camel/types/agents/tool_calling_record.py +6 -3
- camel/types/enums.py +454 -35
- camel/types/mcp_registries.py +2 -2
- camel/types/openai_types.py +4 -4
- camel/types/unified_model_type.py +43 -6
- camel/utils/__init__.py +20 -2
- camel/utils/async_func.py +2 -2
- camel/utils/chunker/__init__.py +2 -2
- camel/utils/chunker/base.py +2 -2
- camel/utils/chunker/code_chunker.py +2 -2
- camel/utils/chunker/uio_chunker.py +2 -2
- camel/utils/commons.py +65 -7
- camel/utils/constants.py +5 -2
- camel/utils/context_utils.py +1134 -0
- camel/utils/deduplication.py +2 -2
- camel/utils/filename.py +2 -2
- camel/utils/langfuse.py +258 -0
- camel/utils/mcp.py +140 -6
- camel/utils/mcp_client.py +1056 -0
- camel/utils/message_summarizer.py +148 -0
- camel/utils/response_format.py +2 -2
- camel/utils/token_counting.py +45 -22
- camel/utils/tool_result.py +44 -0
- camel/verifiers/__init__.py +2 -2
- camel/verifiers/base.py +2 -2
- camel/verifiers/math_verifier.py +2 -2
- camel/verifiers/models.py +2 -2
- camel/verifiers/physics_verifier.py +2 -2
- camel/verifiers/python_verifier.py +2 -2
- {camel_ai-0.2.59.dist-info → camel_ai-0.2.82.dist-info}/METADATA +349 -108
- camel_ai-0.2.82.dist-info/RECORD +507 -0
- {camel_ai-0.2.59.dist-info → camel_ai-0.2.82.dist-info}/WHEEL +1 -1
- {camel_ai-0.2.59.dist-info → camel_ai-0.2.82.dist-info}/licenses/LICENSE +1 -1
- camel/loaders/pandas_reader.py +0 -368
- camel/runtime/api.py +0 -97
- camel/toolkits/dalle_toolkit.py +0 -171
- camel/toolkits/file_write_toolkit.py +0 -395
- camel/toolkits/openai_agent_toolkit.py +0 -135
- camel/toolkits/terminal_toolkit.py +0 -1037
- camel_ai-0.2.59.dist-info/RECORD +0 -410
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# ========= Copyright 2023-
|
|
1
|
+
# ========= Copyright 2023-2025 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
2
2
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
3
|
# you may not use this file except in compliance with the License.
|
|
4
4
|
# You may obtain a copy of the License at
|
|
@@ -10,13 +10,14 @@
|
|
|
10
10
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
11
|
# See the License for the specific language governing permissions and
|
|
12
12
|
# limitations under the License.
|
|
13
|
-
# ========= Copyright 2023-
|
|
13
|
+
# ========= Copyright 2023-2025 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
|
|
15
|
+
# Enables postponed evaluation of annotations (for string-based type hints)
|
|
16
|
+
from __future__ import annotations
|
|
14
17
|
|
|
15
18
|
import datetime
|
|
16
19
|
import io
|
|
17
|
-
import json
|
|
18
20
|
import os
|
|
19
|
-
import random
|
|
20
21
|
import re
|
|
21
22
|
import shutil
|
|
22
23
|
import time
|
|
@@ -25,21 +26,17 @@ from copy import deepcopy
|
|
|
25
26
|
from typing import (
|
|
26
27
|
TYPE_CHECKING,
|
|
27
28
|
Any,
|
|
28
|
-
BinaryIO,
|
|
29
29
|
Dict,
|
|
30
30
|
List,
|
|
31
31
|
Literal,
|
|
32
32
|
Optional,
|
|
33
33
|
Tuple,
|
|
34
|
-
TypedDict,
|
|
35
34
|
Union,
|
|
36
35
|
cast,
|
|
37
36
|
)
|
|
38
37
|
|
|
39
|
-
from PIL import Image
|
|
38
|
+
from PIL import Image
|
|
40
39
|
|
|
41
|
-
if TYPE_CHECKING:
|
|
42
|
-
from camel.agents import ChatAgent
|
|
43
40
|
from camel.logger import get_logger
|
|
44
41
|
from camel.messages import BaseMessage
|
|
45
42
|
from camel.models import BaseModelBackend, ModelFactory
|
|
@@ -53,85 +50,39 @@ from camel.utils import (
|
|
|
53
50
|
sanitize_filename,
|
|
54
51
|
)
|
|
55
52
|
|
|
56
|
-
|
|
53
|
+
# Import shared components from browser_toolkit_commons
|
|
54
|
+
from .browser_toolkit_commons import (
|
|
55
|
+
ACTION_WITH_FEEDBACK_LIST,
|
|
56
|
+
AVAILABLE_ACTIONS_PROMPT,
|
|
57
|
+
GET_FINAL_ANSWER_PROMPT_TEMPLATE,
|
|
58
|
+
OBSERVE_PROMPT_TEMPLATE,
|
|
59
|
+
PLANNING_AGENT_SYSTEM_PROMPT,
|
|
60
|
+
TASK_PLANNING_PROMPT_TEMPLATE,
|
|
61
|
+
TASK_REPLANNING_PROMPT_TEMPLATE,
|
|
62
|
+
WEB_AGENT_SYSTEM_PROMPT,
|
|
63
|
+
InteractiveRegion,
|
|
64
|
+
VisualViewport,
|
|
65
|
+
_add_set_of_mark,
|
|
66
|
+
_parse_json_output,
|
|
67
|
+
_reload_image,
|
|
68
|
+
interactive_region_from_dict,
|
|
69
|
+
visual_viewport_from_dict,
|
|
70
|
+
)
|
|
57
71
|
|
|
58
|
-
|
|
72
|
+
if TYPE_CHECKING:
|
|
73
|
+
from playwright.sync_api import (
|
|
74
|
+
Browser,
|
|
75
|
+
BrowserContext,
|
|
76
|
+
FloatRect,
|
|
77
|
+
Page,
|
|
78
|
+
Playwright,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
from camel.agents import ChatAgent
|
|
59
82
|
|
|
83
|
+
logger = get_logger(__name__)
|
|
60
84
|
|
|
61
|
-
|
|
62
|
-
1. `fill_input_id(identifier: Union[str, int], text: str)`: Fill an input
|
|
63
|
-
field (e.g. search box) with the given text and press Enter.
|
|
64
|
-
2. `click_id(identifier: Union[str, int])`: Click an element with the given ID.
|
|
65
|
-
3. `hover_id(identifier: Union[str, int])`: Hover over an element with the
|
|
66
|
-
given ID.
|
|
67
|
-
4. `download_file_id(identifier: Union[str, int])`: Download a file with the
|
|
68
|
-
given ID. It returns the path to the downloaded file. If the file is
|
|
69
|
-
successfully downloaded, you can stop the simulation and report the path to
|
|
70
|
-
the downloaded file for further processing.
|
|
71
|
-
5. `scroll_to_bottom()`: Scroll to the bottom of the page.
|
|
72
|
-
6. `scroll_to_top()`: Scroll to the top of the page.
|
|
73
|
-
7. `scroll_up()`: Scroll up the page. It is suitable when you want to see the
|
|
74
|
-
elements above the current viewport.
|
|
75
|
-
8. `scroll_down()`: Scroll down the page. It is suitable when you want to see
|
|
76
|
-
the elements below the current viewport. If the webpage does not change, It
|
|
77
|
-
means that the webpage has scrolled to the bottom.
|
|
78
|
-
9. `back()`: Navigate back to the previous page. This is useful when you want
|
|
79
|
-
to go back to the previous page, as current page is not useful.
|
|
80
|
-
10. `stop()`: Stop the action process, because the task is completed or failed
|
|
81
|
-
(impossible to find the answer). In this situation, you should provide your
|
|
82
|
-
answer in your output.
|
|
83
|
-
11. `get_url()`: Get the current URL of the current page.
|
|
84
|
-
12. `find_text_on_page(search_text: str)`: Find the next given text on the
|
|
85
|
-
current whole page, and scroll the page to the targeted text. It is equivalent
|
|
86
|
-
to pressing Ctrl + F and searching for the text, and is powerful when you want
|
|
87
|
-
to fast-check whether the current page contains some specific text.
|
|
88
|
-
13. `visit_page(url: str)`: Go to the specific url page.
|
|
89
|
-
14. `click_blank_area()`: Click a blank area of the page to unfocus the
|
|
90
|
-
current element. It is useful when you have clicked an element but it cannot
|
|
91
|
-
unfocus itself (e.g. Menu bar) to automatically render the updated webpage.
|
|
92
|
-
15. `ask_question_about_video(question: str)`: Ask a question about the
|
|
93
|
-
current webpage which contains video, e.g. youtube websites.
|
|
94
|
-
"""
|
|
95
|
-
|
|
96
|
-
ACTION_WITH_FEEDBACK_LIST = [
|
|
97
|
-
'ask_question_about_video',
|
|
98
|
-
'download_file_id',
|
|
99
|
-
'find_text_on_page',
|
|
100
|
-
]
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
# Code from magentic-one
|
|
104
|
-
class DOMRectangle(TypedDict):
|
|
105
|
-
x: Union[int, float]
|
|
106
|
-
y: Union[int, float]
|
|
107
|
-
width: Union[int, float]
|
|
108
|
-
height: Union[int, float]
|
|
109
|
-
top: Union[int, float]
|
|
110
|
-
right: Union[int, float]
|
|
111
|
-
bottom: Union[int, float]
|
|
112
|
-
left: Union[int, float]
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
class VisualViewport(TypedDict):
|
|
116
|
-
height: Union[int, float]
|
|
117
|
-
width: Union[int, float]
|
|
118
|
-
offsetLeft: Union[int, float]
|
|
119
|
-
offsetTop: Union[int, float]
|
|
120
|
-
pageLeft: Union[int, float]
|
|
121
|
-
pageTop: Union[int, float]
|
|
122
|
-
scale: Union[int, float]
|
|
123
|
-
clientWidth: Union[int, float]
|
|
124
|
-
clientHeight: Union[int, float]
|
|
125
|
-
scrollWidth: Union[int, float]
|
|
126
|
-
scrollHeight: Union[int, float]
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
class InteractiveRegion(TypedDict):
|
|
130
|
-
tag_name: str
|
|
131
|
-
role: str
|
|
132
|
-
aria_name: str
|
|
133
|
-
v_scrollable: bool
|
|
134
|
-
rects: List[DOMRectangle]
|
|
85
|
+
TOP_NO_LABEL_ZONE = 20
|
|
135
86
|
|
|
136
87
|
|
|
137
88
|
def _get_str(d: Any, k: str) -> str:
|
|
@@ -167,270 +118,6 @@ def _get_bool(d: Any, k: str) -> bool:
|
|
|
167
118
|
)
|
|
168
119
|
|
|
169
120
|
|
|
170
|
-
def _parse_json_output(text: str) -> Dict[str, Any]:
|
|
171
|
-
r"""Extract JSON output from a string."""
|
|
172
|
-
|
|
173
|
-
markdown_pattern = r'```(?:json)?\s*(.*?)\s*```'
|
|
174
|
-
markdown_match = re.search(markdown_pattern, text, re.DOTALL)
|
|
175
|
-
if markdown_match:
|
|
176
|
-
text = markdown_match.group(1).strip()
|
|
177
|
-
|
|
178
|
-
triple_quotes_pattern = r'"""(?:json)?\s*(.*?)\s*"""'
|
|
179
|
-
triple_quotes_match = re.search(triple_quotes_pattern, text, re.DOTALL)
|
|
180
|
-
if triple_quotes_match:
|
|
181
|
-
text = triple_quotes_match.group(1).strip()
|
|
182
|
-
|
|
183
|
-
try:
|
|
184
|
-
return json.loads(text)
|
|
185
|
-
except json.JSONDecodeError:
|
|
186
|
-
try:
|
|
187
|
-
fixed_text = re.sub(
|
|
188
|
-
r'`([^`]*?)`(?=\s*[:,\[\]{}]|$)', r'"\1"', text
|
|
189
|
-
)
|
|
190
|
-
return json.loads(fixed_text)
|
|
191
|
-
except json.JSONDecodeError:
|
|
192
|
-
result = {}
|
|
193
|
-
try:
|
|
194
|
-
bool_pattern = r'"(\w+)"\s*:\s*(true|false)'
|
|
195
|
-
for match in re.finditer(bool_pattern, text, re.IGNORECASE):
|
|
196
|
-
key, value = match.groups()
|
|
197
|
-
result[key] = value.lower() == "true"
|
|
198
|
-
|
|
199
|
-
str_pattern = r'"(\w+)"\s*:\s*"([^"]*)"'
|
|
200
|
-
for match in re.finditer(str_pattern, text):
|
|
201
|
-
key, value = match.groups()
|
|
202
|
-
result[key] = value
|
|
203
|
-
|
|
204
|
-
num_pattern = r'"(\w+)"\s*:\s*(-?\d+(?:\.\d+)?)'
|
|
205
|
-
for match in re.finditer(num_pattern, text):
|
|
206
|
-
key, value = match.groups()
|
|
207
|
-
try:
|
|
208
|
-
result[key] = int(value)
|
|
209
|
-
except ValueError:
|
|
210
|
-
result[key] = float(value)
|
|
211
|
-
|
|
212
|
-
empty_str_pattern = r'"(\w+)"\s*:\s*""'
|
|
213
|
-
for match in re.finditer(empty_str_pattern, text):
|
|
214
|
-
key = match.group(1)
|
|
215
|
-
result[key] = ""
|
|
216
|
-
|
|
217
|
-
if result:
|
|
218
|
-
return result
|
|
219
|
-
|
|
220
|
-
logger.warning(f"Failed to parse JSON output: {text}")
|
|
221
|
-
return {}
|
|
222
|
-
except Exception as e:
|
|
223
|
-
logger.warning(f"Error while extracting fields from JSON: {e}")
|
|
224
|
-
return {}
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
def _reload_image(image: Image.Image) -> Image.Image:
|
|
228
|
-
buffer = io.BytesIO()
|
|
229
|
-
image.save(buffer, format="PNG")
|
|
230
|
-
buffer.seek(0)
|
|
231
|
-
return Image.open(buffer)
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
def dom_rectangle_from_dict(rect: Dict[str, Any]) -> DOMRectangle:
|
|
235
|
-
r"""Create a DOMRectangle object from a dictionary."""
|
|
236
|
-
return DOMRectangle(
|
|
237
|
-
x=_get_number(rect, "x"),
|
|
238
|
-
y=_get_number(rect, "y"),
|
|
239
|
-
width=_get_number(rect, "width"),
|
|
240
|
-
height=_get_number(rect, "height"),
|
|
241
|
-
top=_get_number(rect, "top"),
|
|
242
|
-
right=_get_number(rect, "right"),
|
|
243
|
-
bottom=_get_number(rect, "bottom"),
|
|
244
|
-
left=_get_number(rect, "left"),
|
|
245
|
-
)
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
def interactive_region_from_dict(region: Dict[str, Any]) -> InteractiveRegion:
|
|
249
|
-
r"""Create an :class:`InteractiveRegion` object from a dictionary."""
|
|
250
|
-
typed_rects: List[DOMRectangle] = []
|
|
251
|
-
for rect in region["rects"]:
|
|
252
|
-
typed_rects.append(dom_rectangle_from_dict(rect))
|
|
253
|
-
|
|
254
|
-
return InteractiveRegion(
|
|
255
|
-
tag_name=_get_str(region, "tag_name"),
|
|
256
|
-
role=_get_str(region, "role"),
|
|
257
|
-
aria_name=_get_str(region, "aria-name"),
|
|
258
|
-
v_scrollable=_get_bool(region, "v-scrollable"),
|
|
259
|
-
rects=typed_rects,
|
|
260
|
-
)
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
def visual_viewport_from_dict(viewport: Dict[str, Any]) -> VisualViewport:
|
|
264
|
-
r"""Create a :class:`VisualViewport` object from a dictionary."""
|
|
265
|
-
return VisualViewport(
|
|
266
|
-
height=_get_number(viewport, "height"),
|
|
267
|
-
width=_get_number(viewport, "width"),
|
|
268
|
-
offsetLeft=_get_number(viewport, "offsetLeft"),
|
|
269
|
-
offsetTop=_get_number(viewport, "offsetTop"),
|
|
270
|
-
pageLeft=_get_number(viewport, "pageLeft"),
|
|
271
|
-
pageTop=_get_number(viewport, "pageTop"),
|
|
272
|
-
scale=_get_number(viewport, "scale"),
|
|
273
|
-
clientWidth=_get_number(viewport, "clientWidth"),
|
|
274
|
-
clientHeight=_get_number(viewport, "clientHeight"),
|
|
275
|
-
scrollWidth=_get_number(viewport, "scrollWidth"),
|
|
276
|
-
scrollHeight=_get_number(viewport, "scrollHeight"),
|
|
277
|
-
)
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
def add_set_of_mark(
|
|
281
|
-
screenshot: Union[bytes, Image.Image, io.BufferedIOBase],
|
|
282
|
-
ROIs: Dict[str, InteractiveRegion],
|
|
283
|
-
) -> Tuple[Image.Image, List[str], List[str], List[str]]:
|
|
284
|
-
if isinstance(screenshot, Image.Image):
|
|
285
|
-
return _add_set_of_mark(screenshot, ROIs)
|
|
286
|
-
|
|
287
|
-
if isinstance(screenshot, bytes):
|
|
288
|
-
screenshot = io.BytesIO(screenshot)
|
|
289
|
-
|
|
290
|
-
image = Image.open(cast(BinaryIO, screenshot))
|
|
291
|
-
comp, visible_rects, rects_above, rects_below = _add_set_of_mark(
|
|
292
|
-
image, ROIs
|
|
293
|
-
)
|
|
294
|
-
image.close()
|
|
295
|
-
return comp, visible_rects, rects_above, rects_below
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
def _add_set_of_mark(
|
|
299
|
-
screenshot: Image.Image, ROIs: Dict[str, InteractiveRegion]
|
|
300
|
-
) -> Tuple[Image.Image, List[str], List[str], List[str]]:
|
|
301
|
-
r"""Add a set of marks to the screenshot.
|
|
302
|
-
|
|
303
|
-
Args:
|
|
304
|
-
screenshot (Image.Image): The screenshot to add marks to.
|
|
305
|
-
ROIs (Dict[str, InteractiveRegion]): The regions to add marks to.
|
|
306
|
-
|
|
307
|
-
Returns:
|
|
308
|
-
Tuple[Image.Image, List[str], List[str], List[str]]: A tuple
|
|
309
|
-
containing the screenshot with marked ROIs, ROIs fully within the
|
|
310
|
-
images, ROIs located above the visible area, and ROIs located below
|
|
311
|
-
the visible area.
|
|
312
|
-
"""
|
|
313
|
-
visible_rects: List[str] = list()
|
|
314
|
-
rects_above: List[str] = list() # Scroll up to see
|
|
315
|
-
rects_below: List[str] = list() # Scroll down to see
|
|
316
|
-
|
|
317
|
-
fnt = ImageFont.load_default(14)
|
|
318
|
-
base = screenshot.convert("L").convert("RGBA")
|
|
319
|
-
overlay = Image.new("RGBA", base.size)
|
|
320
|
-
|
|
321
|
-
draw = ImageDraw.Draw(overlay)
|
|
322
|
-
for r in ROIs:
|
|
323
|
-
for rect in ROIs[r]["rects"]:
|
|
324
|
-
# Empty rectangles
|
|
325
|
-
if not rect or rect["width"] == 0 or rect["height"] == 0:
|
|
326
|
-
continue
|
|
327
|
-
|
|
328
|
-
# TODO: add scroll left and right?
|
|
329
|
-
horizontal_center = (rect["right"] + rect["left"]) / 2.0
|
|
330
|
-
vertical_center = (rect["top"] + rect["bottom"]) / 2.0
|
|
331
|
-
is_within_horizon = 0 <= horizontal_center < base.size[0]
|
|
332
|
-
is_above_viewport = vertical_center < 0
|
|
333
|
-
is_below_viewport = vertical_center >= base.size[1]
|
|
334
|
-
|
|
335
|
-
if is_within_horizon:
|
|
336
|
-
if is_above_viewport:
|
|
337
|
-
rects_above.append(r)
|
|
338
|
-
elif is_below_viewport:
|
|
339
|
-
rects_below.append(r)
|
|
340
|
-
else: # Fully visible
|
|
341
|
-
visible_rects.append(r)
|
|
342
|
-
_draw_roi(draw, int(r), fnt, rect)
|
|
343
|
-
|
|
344
|
-
comp = Image.alpha_composite(base, overlay)
|
|
345
|
-
overlay.close()
|
|
346
|
-
return comp, visible_rects, rects_above, rects_below
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
def _draw_roi(
|
|
350
|
-
draw: ImageDraw.ImageDraw,
|
|
351
|
-
idx: int,
|
|
352
|
-
font: ImageFont.FreeTypeFont | ImageFont.ImageFont,
|
|
353
|
-
rect: DOMRectangle,
|
|
354
|
-
) -> None:
|
|
355
|
-
r"""Draw a ROI on the image.
|
|
356
|
-
|
|
357
|
-
Args:
|
|
358
|
-
draw (ImageDraw.ImageDraw): The draw object.
|
|
359
|
-
idx (int): The index of the ROI.
|
|
360
|
-
font (ImageFont.FreeTypeFont | ImageFont.ImageFont): The font.
|
|
361
|
-
rect (DOMRectangle): The DOM rectangle.
|
|
362
|
-
"""
|
|
363
|
-
color = _get_random_color(idx)
|
|
364
|
-
text_color = _get_text_color(color)
|
|
365
|
-
|
|
366
|
-
roi = ((rect["left"], rect["top"]), (rect["right"], rect["bottom"]))
|
|
367
|
-
|
|
368
|
-
label_location = (rect["right"], rect["top"])
|
|
369
|
-
label_anchor = "rb"
|
|
370
|
-
|
|
371
|
-
if label_location[1] <= TOP_NO_LABEL_ZONE:
|
|
372
|
-
label_location = (rect["right"], rect["bottom"])
|
|
373
|
-
label_anchor = "rt"
|
|
374
|
-
|
|
375
|
-
draw.rectangle(
|
|
376
|
-
roi, outline=color, fill=(color[0], color[1], color[2], 48), width=2
|
|
377
|
-
)
|
|
378
|
-
|
|
379
|
-
bbox = draw.textbbox(
|
|
380
|
-
label_location,
|
|
381
|
-
str(idx),
|
|
382
|
-
font=font,
|
|
383
|
-
anchor=label_anchor,
|
|
384
|
-
align="center",
|
|
385
|
-
)
|
|
386
|
-
bbox = (bbox[0] - 3, bbox[1] - 3, bbox[2] + 3, bbox[3] + 3)
|
|
387
|
-
draw.rectangle(bbox, fill=color)
|
|
388
|
-
|
|
389
|
-
draw.text(
|
|
390
|
-
label_location,
|
|
391
|
-
str(idx),
|
|
392
|
-
fill=text_color,
|
|
393
|
-
font=font,
|
|
394
|
-
anchor=label_anchor,
|
|
395
|
-
align="center",
|
|
396
|
-
)
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
def _get_text_color(
|
|
400
|
-
bg_color: Tuple[int, int, int, int],
|
|
401
|
-
) -> Tuple[int, int, int, int]:
|
|
402
|
-
r"""Determine the ideal text color (black or white) for contrast.
|
|
403
|
-
|
|
404
|
-
Args:
|
|
405
|
-
bg_color: The background color (R, G, B, A).
|
|
406
|
-
|
|
407
|
-
Returns:
|
|
408
|
-
A tuple representing black or white color for text.
|
|
409
|
-
"""
|
|
410
|
-
luminance = bg_color[0] * 0.3 + bg_color[1] * 0.59 + bg_color[2] * 0.11
|
|
411
|
-
return (0, 0, 0, 255) if luminance > 120 else (255, 255, 255, 255)
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
def _get_random_color(identifier: int) -> Tuple[int, int, int, int]:
|
|
415
|
-
r"""Generate a consistent random RGBA color based on the identifier.
|
|
416
|
-
|
|
417
|
-
Args:
|
|
418
|
-
identifier: The ID used as a seed to ensure color consistency.
|
|
419
|
-
|
|
420
|
-
Returns:
|
|
421
|
-
A tuple representing (R, G, B, A) values.
|
|
422
|
-
"""
|
|
423
|
-
rnd = random.Random(int(identifier))
|
|
424
|
-
r = rnd.randint(0, 255)
|
|
425
|
-
g = rnd.randint(125, 255)
|
|
426
|
-
b = rnd.randint(0, 50)
|
|
427
|
-
color = [r, g, b]
|
|
428
|
-
# TODO: check why shuffle is needed?
|
|
429
|
-
rnd.shuffle(color)
|
|
430
|
-
color.append(255)
|
|
431
|
-
return cast(Tuple[int, int, int, int], tuple(color))
|
|
432
|
-
|
|
433
|
-
|
|
434
121
|
class BaseBrowser:
|
|
435
122
|
def __init__(
|
|
436
123
|
self,
|
|
@@ -438,6 +125,7 @@ class BaseBrowser:
|
|
|
438
125
|
cache_dir: Optional[str] = None,
|
|
439
126
|
channel: Literal["chrome", "msedge", "chromium"] = "chromium",
|
|
440
127
|
cookie_json_path: Optional[str] = None,
|
|
128
|
+
user_data_dir: Optional[str] = None,
|
|
441
129
|
):
|
|
442
130
|
r"""Initialize the WebBrowser instance.
|
|
443
131
|
|
|
@@ -449,28 +137,36 @@ class BaseBrowser:
|
|
|
449
137
|
"chromium".
|
|
450
138
|
cookie_json_path (Optional[str]): Path to a JSON file containing
|
|
451
139
|
authentication cookies and browser storage state. If provided
|
|
452
|
-
and the file exists, the browser will load this state to
|
|
453
|
-
authenticated sessions
|
|
140
|
+
and the file exists, the browser will load this state to
|
|
141
|
+
maintain authenticated sessions. This is primarily used when
|
|
142
|
+
`user_data_dir` is not set.
|
|
143
|
+
user_data_dir (Optional[str]): The directory to store user data
|
|
144
|
+
for persistent context. If None, a fresh browser instance
|
|
145
|
+
is used without saving data. (default: :obj:`None`)
|
|
454
146
|
|
|
455
147
|
Returns:
|
|
456
148
|
None
|
|
457
149
|
"""
|
|
458
|
-
|
|
459
|
-
sync_playwright,
|
|
460
|
-
)
|
|
461
|
-
|
|
462
|
-
self.history: list = []
|
|
150
|
+
self.history: List[Any] = []
|
|
463
151
|
self.headless = headless
|
|
464
152
|
self.channel = channel
|
|
465
153
|
self._ensure_browser_installed()
|
|
466
|
-
|
|
467
|
-
self.
|
|
154
|
+
# lazy initialization - playwright is started in init() method
|
|
155
|
+
self.playwright: Optional[Playwright] = None
|
|
156
|
+
self.page_history: List[
|
|
157
|
+
str
|
|
158
|
+
] = [] # stores the history of visited pages
|
|
468
159
|
self.cookie_json_path = cookie_json_path
|
|
160
|
+
self.user_data_dir = user_data_dir
|
|
469
161
|
|
|
470
162
|
# Set the cache directory
|
|
471
163
|
self.cache_dir = "tmp/" if cache_dir is None else cache_dir
|
|
472
164
|
os.makedirs(self.cache_dir, exist_ok=True)
|
|
473
165
|
|
|
166
|
+
# Create user data directory only if specified
|
|
167
|
+
if self.user_data_dir:
|
|
168
|
+
os.makedirs(self.user_data_dir, exist_ok=True)
|
|
169
|
+
|
|
474
170
|
# Load the page script
|
|
475
171
|
abs_dir_path = os.path.dirname(os.path.abspath(__file__))
|
|
476
172
|
page_script_path = os.path.join(abs_dir_path, "page_script.js")
|
|
@@ -483,27 +179,70 @@ class BaseBrowser:
|
|
|
483
179
|
raise FileNotFoundError(
|
|
484
180
|
f"Page script file not found at path: {page_script_path}"
|
|
485
181
|
)
|
|
182
|
+
self.browser: Optional[Browser] = None
|
|
183
|
+
self.context: Optional[BrowserContext] = None
|
|
184
|
+
self.page: Optional[Page] = None
|
|
185
|
+
self.page_url: Optional[str] = None
|
|
186
|
+
self.web_agent_model: Optional[BaseModelBackend] = (
|
|
187
|
+
None # Added for type hinting
|
|
188
|
+
)
|
|
486
189
|
|
|
487
190
|
def init(self) -> None:
|
|
488
191
|
r"""Initialize the browser."""
|
|
489
|
-
#
|
|
490
|
-
self.
|
|
491
|
-
|
|
192
|
+
# lazy start playwright when init() is called, not in __init__
|
|
193
|
+
if self.playwright is None:
|
|
194
|
+
from playwright.sync_api import sync_playwright
|
|
195
|
+
|
|
196
|
+
self.playwright = sync_playwright().start()
|
|
197
|
+
|
|
198
|
+
browser_launch_args = [
|
|
199
|
+
"--disable-blink-features=AutomationControlled", # Basic stealth
|
|
200
|
+
]
|
|
201
|
+
|
|
202
|
+
user_agent_string = (
|
|
203
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
|
204
|
+
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
|
205
|
+
"Chrome/91.0.4472.124 Safari/537.36"
|
|
492
206
|
)
|
|
493
207
|
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
accept_downloads=True,
|
|
208
|
+
if self.user_data_dir:
|
|
209
|
+
self.context = self.playwright.chromium.launch_persistent_context(
|
|
210
|
+
user_data_dir=self.user_data_dir,
|
|
211
|
+
headless=self.headless,
|
|
212
|
+
channel=self.channel,
|
|
213
|
+
accept_downloads=True,
|
|
214
|
+
user_agent=user_agent_string,
|
|
215
|
+
java_script_enabled=True,
|
|
216
|
+
args=browser_launch_args,
|
|
500
217
|
)
|
|
218
|
+
self.browser = None # Not using a separate browser instance
|
|
219
|
+
if (
|
|
220
|
+
len(self.context.pages) > 0
|
|
221
|
+
): # Persistent context might reopen pages
|
|
222
|
+
self.page = self.context.pages[0]
|
|
223
|
+
else:
|
|
224
|
+
self.page = self.context.new_page()
|
|
501
225
|
else:
|
|
502
|
-
|
|
503
|
-
|
|
226
|
+
# Launch a fresh browser instance
|
|
227
|
+
self.browser = self.playwright.chromium.launch(
|
|
228
|
+
headless=self.headless,
|
|
229
|
+
channel=self.channel,
|
|
230
|
+
args=browser_launch_args,
|
|
504
231
|
)
|
|
505
|
-
|
|
506
|
-
|
|
232
|
+
|
|
233
|
+
new_context_kwargs: Dict[str, Any] = {
|
|
234
|
+
"accept_downloads": True,
|
|
235
|
+
"user_agent": user_agent_string,
|
|
236
|
+
"java_script_enabled": True,
|
|
237
|
+
}
|
|
238
|
+
if self.cookie_json_path and os.path.exists(self.cookie_json_path):
|
|
239
|
+
new_context_kwargs["storage_state"] = self.cookie_json_path
|
|
240
|
+
|
|
241
|
+
self.context = self.browser.new_context(**new_context_kwargs)
|
|
242
|
+
self.page = self.context.new_page()
|
|
243
|
+
|
|
244
|
+
assert self.context is not None
|
|
245
|
+
assert self.page is not None
|
|
507
246
|
|
|
508
247
|
def clean_cache(self) -> None:
|
|
509
248
|
r"""Delete the cache directory and its contents."""
|
|
@@ -513,7 +252,7 @@ class BaseBrowser:
|
|
|
513
252
|
def _wait_for_load(self, timeout: int = 20) -> None:
|
|
514
253
|
r"""Wait for a certain amount of time for the page to load."""
|
|
515
254
|
timeout_ms = timeout * 1000
|
|
516
|
-
|
|
255
|
+
assert self.page is not None
|
|
517
256
|
self.page.wait_for_load_state("load", timeout=timeout_ms)
|
|
518
257
|
|
|
519
258
|
# TODO: check if this is needed
|
|
@@ -521,13 +260,14 @@ class BaseBrowser:
|
|
|
521
260
|
|
|
522
261
|
def click_blank_area(self) -> None:
|
|
523
262
|
r"""Click a blank area of the page to unfocus the current element."""
|
|
263
|
+
assert self.page is not None
|
|
524
264
|
self.page.mouse.click(0, 0)
|
|
525
265
|
self._wait_for_load()
|
|
526
266
|
|
|
527
267
|
@retry_on_error()
|
|
528
268
|
def visit_page(self, url: str) -> None:
|
|
529
269
|
r"""Visit a page with the given URL."""
|
|
530
|
-
|
|
270
|
+
assert self.page is not None
|
|
531
271
|
self.page.goto(url)
|
|
532
272
|
self._wait_for_load()
|
|
533
273
|
self.page_url = url
|
|
@@ -544,7 +284,8 @@ class BaseBrowser:
|
|
|
544
284
|
"""
|
|
545
285
|
current_url = self.get_url()
|
|
546
286
|
|
|
547
|
-
# Confirm with user before proceeding due to potential slow
|
|
287
|
+
# Confirm with user before proceeding due to potential slow
|
|
288
|
+
# processing time
|
|
548
289
|
confirmation_message = (
|
|
549
290
|
f"Do you want to analyze the video on the current "
|
|
550
291
|
f"page({current_url})? This operation may take a long time.(y/n): "
|
|
@@ -555,7 +296,10 @@ class BaseBrowser:
|
|
|
555
296
|
return "User cancelled the video analysis."
|
|
556
297
|
|
|
557
298
|
model = None
|
|
558
|
-
if
|
|
299
|
+
if (
|
|
300
|
+
hasattr(self, 'web_agent_model')
|
|
301
|
+
and self.web_agent_model is not None
|
|
302
|
+
):
|
|
559
303
|
model = self.web_agent_model
|
|
560
304
|
|
|
561
305
|
video_analyzer = VideoAnalysisToolkit(model=model)
|
|
@@ -577,7 +321,7 @@ class BaseBrowser:
|
|
|
577
321
|
image and the path to the image file if saved, otherwise
|
|
578
322
|
:obj:`None`.
|
|
579
323
|
"""
|
|
580
|
-
|
|
324
|
+
assert self.page is not None
|
|
581
325
|
image_data = self.page.screenshot(timeout=60000)
|
|
582
326
|
image = Image.open(io.BytesIO(image_data))
|
|
583
327
|
|
|
@@ -585,6 +329,7 @@ class BaseBrowser:
|
|
|
585
329
|
if save_image:
|
|
586
330
|
# Get url name to form a file name
|
|
587
331
|
# Use urlparser for a safer extraction the url name
|
|
332
|
+
assert self.page_url is not None
|
|
588
333
|
parsed_url = urllib.parse.urlparse(self.page_url)
|
|
589
334
|
# Max length is set to 241 as there are 10 characters for the
|
|
590
335
|
# timestamp and 4 characters for the file extension:
|
|
@@ -612,17 +357,24 @@ class BaseBrowser:
|
|
|
612
357
|
Returns:
|
|
613
358
|
List[str]: A list of paths to the screenshot files.
|
|
614
359
|
"""
|
|
615
|
-
screenshots = []
|
|
616
|
-
|
|
360
|
+
screenshots: List[str] = [] # Ensure screenshots is typed
|
|
361
|
+
assert self.page is not None
|
|
362
|
+
scroll_height_eval = self.page.evaluate("document.body.scrollHeight")
|
|
363
|
+
scroll_height = cast(
|
|
364
|
+
float, scroll_height_eval
|
|
365
|
+
) # Ensure scroll_height is
|
|
366
|
+
# float
|
|
367
|
+
|
|
617
368
|
assert self.page.viewport_size is not None
|
|
618
369
|
viewport_height = self.page.viewport_size["height"]
|
|
619
|
-
|
|
620
|
-
|
|
370
|
+
current_scroll_eval = self.page.evaluate("window.scrollY")
|
|
371
|
+
current_scroll = cast(float, current_scroll_eval)
|
|
372
|
+
# screenshot_index = 1 # This variable is not used
|
|
621
373
|
|
|
622
374
|
max_height = scroll_height - viewport_height
|
|
623
375
|
scroll_step = int(viewport_height * scroll_ratio)
|
|
624
376
|
|
|
625
|
-
last_height = 0
|
|
377
|
+
last_height = 0.0 # Initialize last_height as float
|
|
626
378
|
|
|
627
379
|
while True:
|
|
628
380
|
logger.debug(
|
|
@@ -631,19 +383,22 @@ class BaseBrowser:
|
|
|
631
383
|
)
|
|
632
384
|
|
|
633
385
|
_, file_path = self.get_screenshot(save_image=True)
|
|
634
|
-
|
|
386
|
+
if file_path is not None: # Ensure file_path is not None before
|
|
387
|
+
# appending
|
|
388
|
+
screenshots.append(file_path)
|
|
635
389
|
|
|
636
390
|
self.page.evaluate(f"window.scrollBy(0, {scroll_step})")
|
|
637
391
|
# Allow time for content to load
|
|
638
392
|
time.sleep(0.5)
|
|
639
393
|
|
|
640
|
-
|
|
394
|
+
current_scroll_eval = self.page.evaluate("window.scrollY")
|
|
395
|
+
current_scroll = cast(float, current_scroll_eval)
|
|
641
396
|
# Break if there is no significant scroll
|
|
642
397
|
if abs(current_scroll - last_height) < viewport_height * 0.1:
|
|
643
398
|
break
|
|
644
399
|
|
|
645
400
|
last_height = current_scroll
|
|
646
|
-
screenshot_index += 1
|
|
401
|
+
# screenshot_index += 1 # This variable is not used
|
|
647
402
|
|
|
648
403
|
return screenshots
|
|
649
404
|
|
|
@@ -653,13 +408,17 @@ class BaseBrowser:
|
|
|
653
408
|
Returns:
|
|
654
409
|
VisualViewport: The visual viewport of the current page.
|
|
655
410
|
"""
|
|
411
|
+
assert self.page is not None
|
|
656
412
|
try:
|
|
657
413
|
self.page.evaluate(self.page_script)
|
|
658
414
|
except Exception as e:
|
|
659
415
|
logger.warning(f"Error evaluating page script: {e}")
|
|
660
416
|
|
|
417
|
+
visual_viewport_eval = self.page.evaluate(
|
|
418
|
+
"MultimodalWebSurfer.getVisualViewport();"
|
|
419
|
+
)
|
|
661
420
|
return visual_viewport_from_dict(
|
|
662
|
-
|
|
421
|
+
cast(Dict[str, Any], visual_viewport_eval)
|
|
663
422
|
)
|
|
664
423
|
|
|
665
424
|
def get_interactive_elements(self) -> Dict[str, InteractiveRegion]:
|
|
@@ -668,6 +427,7 @@ class BaseBrowser:
|
|
|
668
427
|
Returns:
|
|
669
428
|
Dict[str, InteractiveRegion]: A dictionary of interactive elements.
|
|
670
429
|
"""
|
|
430
|
+
assert self.page is not None
|
|
671
431
|
try:
|
|
672
432
|
self.page.evaluate(self.page_script)
|
|
673
433
|
except Exception as e:
|
|
@@ -682,7 +442,7 @@ class BaseBrowser:
|
|
|
682
442
|
for k in result:
|
|
683
443
|
typed_results[k] = interactive_region_from_dict(result[k])
|
|
684
444
|
|
|
685
|
-
return typed_results
|
|
445
|
+
return typed_results
|
|
686
446
|
|
|
687
447
|
def get_som_screenshot(
|
|
688
448
|
self,
|
|
@@ -696,7 +456,8 @@ class BaseBrowser:
|
|
|
696
456
|
directory.
|
|
697
457
|
|
|
698
458
|
Returns:
|
|
699
|
-
Tuple[Image.Image, Union[str, None]]: A tuple containing the
|
|
459
|
+
Tuple[Image.Image, Union[str, None]]: A tuple containing the
|
|
460
|
+
screenshot image
|
|
700
461
|
and an optional path to the image file if saved, otherwise
|
|
701
462
|
:obj:`None`.
|
|
702
463
|
"""
|
|
@@ -706,11 +467,12 @@ class BaseBrowser:
|
|
|
706
467
|
rects = self.get_interactive_elements()
|
|
707
468
|
|
|
708
469
|
file_path: str | None = None
|
|
709
|
-
comp, _, _, _ =
|
|
470
|
+
comp, _, _, _ = _add_set_of_mark(
|
|
710
471
|
screenshot,
|
|
711
|
-
rects,
|
|
472
|
+
rects,
|
|
712
473
|
)
|
|
713
474
|
if save_image:
|
|
475
|
+
assert self.page_url is not None
|
|
714
476
|
parsed_url = urllib.parse.urlparse(self.page_url)
|
|
715
477
|
# Max length is set to 241 as there are 10 characters for the
|
|
716
478
|
# timestamp and 4 characters for the file extension:
|
|
@@ -727,25 +489,30 @@ class BaseBrowser:
|
|
|
727
489
|
|
|
728
490
|
def scroll_up(self) -> None:
|
|
729
491
|
r"""Scroll up the page."""
|
|
492
|
+
assert self.page is not None
|
|
730
493
|
self.page.keyboard.press("PageUp")
|
|
731
494
|
|
|
732
495
|
def scroll_down(self) -> None:
|
|
733
496
|
r"""Scroll down the page."""
|
|
497
|
+
assert self.page is not None
|
|
734
498
|
self.page.keyboard.press("PageDown")
|
|
735
499
|
|
|
736
500
|
def get_url(self) -> str:
|
|
737
501
|
r"""Get the URL of the current page."""
|
|
502
|
+
assert self.page is not None
|
|
738
503
|
return self.page.url
|
|
739
504
|
|
|
740
505
|
def click_id(self, identifier: Union[str, int]) -> None:
|
|
741
506
|
r"""Click an element with the given identifier."""
|
|
507
|
+
assert self.page is not None
|
|
742
508
|
if isinstance(identifier, int):
|
|
743
509
|
identifier = str(identifier)
|
|
744
510
|
target = self.page.locator(f"[__elementId='{identifier}']")
|
|
745
511
|
|
|
746
512
|
try:
|
|
747
513
|
target.wait_for(timeout=5000)
|
|
748
|
-
except
|
|
514
|
+
except Exception as e: # Consider using playwright specific
|
|
515
|
+
# TimeoutError
|
|
749
516
|
logger.debug(f"Error during click operation: {e}")
|
|
750
517
|
raise ValueError("No such element.") from None
|
|
751
518
|
|
|
@@ -754,7 +521,13 @@ class BaseBrowser:
|
|
|
754
521
|
new_page = None
|
|
755
522
|
try:
|
|
756
523
|
with self.page.expect_event("popup", timeout=1000) as page_info:
|
|
757
|
-
box
|
|
524
|
+
box: Optional[FloatRect] = target.bounding_box()
|
|
525
|
+
if box is None:
|
|
526
|
+
logger.warning(
|
|
527
|
+
f"Bounding box not found for element '{identifier}'. "
|
|
528
|
+
f"Cannot click."
|
|
529
|
+
)
|
|
530
|
+
return
|
|
758
531
|
self.page.mouse.click(
|
|
759
532
|
box["x"] + box["width"] / 2, box["y"] + box["height"] / 2
|
|
760
533
|
)
|
|
@@ -765,7 +538,8 @@ class BaseBrowser:
|
|
|
765
538
|
self.page_history.append(deepcopy(self.page.url))
|
|
766
539
|
self.page = new_page
|
|
767
540
|
|
|
768
|
-
except
|
|
541
|
+
except Exception as e: # Consider using playwright specific
|
|
542
|
+
# TimeoutError
|
|
769
543
|
logger.debug(f"Error during click operation: {e}")
|
|
770
544
|
pass
|
|
771
545
|
|
|
@@ -773,6 +547,7 @@ class BaseBrowser:
|
|
|
773
547
|
|
|
774
548
|
def extract_url_content(self) -> str:
|
|
775
549
|
r"""Extract the content of the current page."""
|
|
550
|
+
assert self.page is not None
|
|
776
551
|
content = self.page.content()
|
|
777
552
|
return content
|
|
778
553
|
|
|
@@ -781,17 +556,17 @@ class BaseBrowser:
|
|
|
781
556
|
|
|
782
557
|
Args:
|
|
783
558
|
identifier (str): The identifier of the file to download.
|
|
784
|
-
file_path (str): The path to save the downloaded file.
|
|
785
559
|
|
|
786
560
|
Returns:
|
|
787
561
|
str: The result of the action.
|
|
788
562
|
"""
|
|
789
|
-
|
|
563
|
+
assert self.page is not None
|
|
790
564
|
if isinstance(identifier, int):
|
|
791
565
|
identifier = str(identifier)
|
|
792
566
|
try:
|
|
793
567
|
target = self.page.locator(f"[__elementId='{identifier}']")
|
|
794
|
-
except
|
|
568
|
+
except Exception as e: # Consider using playwright specific
|
|
569
|
+
# TimeoutError
|
|
795
570
|
logger.debug(f"Error during download operation: {e}")
|
|
796
571
|
logger.warning(
|
|
797
572
|
f"Element with identifier '{identifier}' not found."
|
|
@@ -800,7 +575,7 @@ class BaseBrowser:
|
|
|
800
575
|
|
|
801
576
|
target.scroll_into_view_if_needed()
|
|
802
577
|
|
|
803
|
-
|
|
578
|
+
file_path_val = os.path.join(self.cache_dir)
|
|
804
579
|
self._wait_for_load()
|
|
805
580
|
|
|
806
581
|
try:
|
|
@@ -809,12 +584,13 @@ class BaseBrowser:
|
|
|
809
584
|
download = download_info.value
|
|
810
585
|
file_name = download.suggested_filename
|
|
811
586
|
|
|
812
|
-
|
|
813
|
-
download.save_as(
|
|
587
|
+
file_path_val = os.path.join(file_path_val, file_name)
|
|
588
|
+
download.save_as(file_path_val)
|
|
814
589
|
|
|
815
|
-
return f"Downloaded file to path '{
|
|
590
|
+
return f"Downloaded file to path '{file_path_val}'."
|
|
816
591
|
|
|
817
|
-
except
|
|
592
|
+
except Exception as e: # Consider using playwright specific
|
|
593
|
+
# TimeoutError
|
|
818
594
|
logger.debug(f"Error during download operation: {e}")
|
|
819
595
|
return f"Failed to download file with identifier '{identifier}'."
|
|
820
596
|
|
|
@@ -828,12 +604,14 @@ class BaseBrowser:
|
|
|
828
604
|
Returns:
|
|
829
605
|
str: The result of the action.
|
|
830
606
|
"""
|
|
607
|
+
assert self.page is not None
|
|
831
608
|
if isinstance(identifier, int):
|
|
832
609
|
identifier = str(identifier)
|
|
833
610
|
|
|
834
611
|
try:
|
|
835
612
|
target = self.page.locator(f"[__elementId='{identifier}']")
|
|
836
|
-
except
|
|
613
|
+
except Exception as e: # Consider using playwright specific
|
|
614
|
+
# TimeoutError
|
|
837
615
|
logger.debug(f"Error during fill operation: {e}")
|
|
838
616
|
logger.warning(
|
|
839
617
|
f"Element with identifier '{identifier}' not found."
|
|
@@ -844,7 +622,8 @@ class BaseBrowser:
|
|
|
844
622
|
target.focus()
|
|
845
623
|
try:
|
|
846
624
|
target.fill(text)
|
|
847
|
-
except
|
|
625
|
+
except Exception as e: # Consider using playwright specific
|
|
626
|
+
# TimeoutError
|
|
848
627
|
logger.debug(f"Error during fill operation: {e}")
|
|
849
628
|
target.press_sequentially(text)
|
|
850
629
|
|
|
@@ -856,11 +635,13 @@ class BaseBrowser:
|
|
|
856
635
|
)
|
|
857
636
|
|
|
858
637
|
def scroll_to_bottom(self) -> str:
|
|
638
|
+
assert self.page is not None
|
|
859
639
|
self.page.evaluate("window.scrollTo(0, document.body.scrollHeight);")
|
|
860
640
|
self._wait_for_load()
|
|
861
641
|
return "Scrolled to the bottom of the page."
|
|
862
642
|
|
|
863
643
|
def scroll_to_top(self) -> str:
|
|
644
|
+
assert self.page is not None
|
|
864
645
|
self.page.evaluate("window.scrollTo(0, 0);")
|
|
865
646
|
self._wait_for_load()
|
|
866
647
|
return "Scrolled to the top of the page."
|
|
@@ -874,11 +655,13 @@ class BaseBrowser:
|
|
|
874
655
|
Returns:
|
|
875
656
|
str: The result of the action.
|
|
876
657
|
"""
|
|
658
|
+
assert self.page is not None
|
|
877
659
|
if isinstance(identifier, int):
|
|
878
660
|
identifier = str(identifier)
|
|
879
661
|
try:
|
|
880
662
|
target = self.page.locator(f"[__elementId='{identifier}']")
|
|
881
|
-
except
|
|
663
|
+
except Exception as e: # Consider using playwright specific
|
|
664
|
+
# TimeoutError
|
|
882
665
|
logger.debug(f"Error during hover operation: {e}")
|
|
883
666
|
logger.warning(
|
|
884
667
|
f"Element with identifier '{identifier}' not found."
|
|
@@ -895,16 +678,18 @@ class BaseBrowser:
|
|
|
895
678
|
targeted text. It is equivalent to pressing Ctrl + F and searching for
|
|
896
679
|
the text.
|
|
897
680
|
"""
|
|
898
|
-
|
|
681
|
+
assert self.page is not None
|
|
899
682
|
script = f"""
|
|
900
683
|
(function() {{
|
|
901
684
|
let text = "{search_text}";
|
|
902
685
|
let found = window.find(text);
|
|
903
686
|
if (!found) {{
|
|
904
|
-
let elements = document.querySelectorAll("*:not(script):not(
|
|
687
|
+
let elements = document.querySelectorAll("*:not(script):not(
|
|
688
|
+
style)");
|
|
905
689
|
for (let el of elements) {{
|
|
906
690
|
if (el.innerText && el.innerText.includes(text)) {{
|
|
907
|
-
el.scrollIntoView({{behavior: "smooth", block:
|
|
691
|
+
el.scrollIntoView({{behavior: "smooth", block:
|
|
692
|
+
"center"}});
|
|
908
693
|
el.style.backgroundColor = "yellow";
|
|
909
694
|
el.style.border = '2px solid red';
|
|
910
695
|
return true;
|
|
@@ -915,7 +700,8 @@ class BaseBrowser:
|
|
|
915
700
|
return true;
|
|
916
701
|
}})();
|
|
917
702
|
"""
|
|
918
|
-
|
|
703
|
+
found_eval = self.page.evaluate(script)
|
|
704
|
+
found = cast(bool, found_eval) # Ensure found is bool
|
|
919
705
|
self._wait_for_load()
|
|
920
706
|
if found:
|
|
921
707
|
return f"Found text '{search_text}' on the page."
|
|
@@ -924,7 +710,7 @@ class BaseBrowser:
|
|
|
924
710
|
|
|
925
711
|
def back(self):
|
|
926
712
|
r"""Navigate back to the previous page."""
|
|
927
|
-
|
|
713
|
+
assert self.page is not None
|
|
928
714
|
page_url_before = self.page.url
|
|
929
715
|
self.page.go_back()
|
|
930
716
|
|
|
@@ -942,15 +728,24 @@ class BaseBrowser:
|
|
|
942
728
|
self._wait_for_load()
|
|
943
729
|
|
|
944
730
|
def close(self):
|
|
945
|
-
self.
|
|
731
|
+
if self.context is not None:
|
|
732
|
+
self.context.close()
|
|
733
|
+
if (
|
|
734
|
+
self.browser is not None
|
|
735
|
+
): # Only close browser if it was launched separately
|
|
736
|
+
self.browser.close()
|
|
737
|
+
if self.playwright:
|
|
738
|
+
self.playwright.stop() # Stop playwright instance
|
|
946
739
|
|
|
947
|
-
# ruff: noqa: E501
|
|
948
740
|
def show_interactive_elements(self):
|
|
949
741
|
r"""Show simple interactive elements on the current page."""
|
|
742
|
+
assert self.page is not None
|
|
950
743
|
self.page.evaluate(self.page_script)
|
|
951
744
|
self.page.evaluate("""
|
|
952
745
|
() => {
|
|
953
|
-
document.querySelectorAll('a, button, input, select, textarea,
|
|
746
|
+
document.querySelectorAll('a, button, input, select, textarea,
|
|
747
|
+
[tabindex]:not([tabindex="-1"]),
|
|
748
|
+
[contenteditable="true"]').forEach(el => {
|
|
954
749
|
el.style.border = '2px solid red';
|
|
955
750
|
});
|
|
956
751
|
}
|
|
@@ -960,6 +755,7 @@ class BaseBrowser:
|
|
|
960
755
|
def get_webpage_content(self) -> str:
|
|
961
756
|
from html2text import html2text
|
|
962
757
|
|
|
758
|
+
assert self.page is not None
|
|
963
759
|
self._wait_for_load()
|
|
964
760
|
html_content = self.page.content()
|
|
965
761
|
|
|
@@ -1026,11 +822,15 @@ class BrowserToolkit(BaseToolkit):
|
|
|
1026
822
|
planning_agent_model: Optional[BaseModelBackend] = None,
|
|
1027
823
|
output_language: str = "en",
|
|
1028
824
|
cookie_json_path: Optional[str] = None,
|
|
825
|
+
user_data_dir: Optional[str] = None,
|
|
1029
826
|
):
|
|
1030
827
|
r"""Initialize the BrowserToolkit instance.
|
|
1031
828
|
|
|
1032
829
|
Args:
|
|
1033
830
|
headless (bool): Whether to run the browser in headless mode.
|
|
831
|
+
When running inside a CAMEL runtime container, this is
|
|
832
|
+
automatically set to True since containers typically don't
|
|
833
|
+
have a display.
|
|
1034
834
|
cache_dir (Union[str, None]): The directory to store cache files.
|
|
1035
835
|
channel (Literal["chrome", "msedge", "chromium"]): The browser
|
|
1036
836
|
channel to use. Must be one of "chrome", "msedge", or
|
|
@@ -1045,25 +845,47 @@ class BrowserToolkit(BaseToolkit):
|
|
|
1045
845
|
(default: :obj:`"en`")
|
|
1046
846
|
cookie_json_path (Optional[str]): Path to a JSON file containing
|
|
1047
847
|
authentication cookies and browser storage state. If provided
|
|
1048
|
-
and the file exists, the browser will load this state to
|
|
848
|
+
and the file exists, the browser will load this state to
|
|
849
|
+
maintain
|
|
1049
850
|
authenticated sessions without requiring manual login.
|
|
1050
851
|
(default: :obj:`None`)
|
|
852
|
+
user_data_dir (Optional[str]): The directory to store user data
|
|
853
|
+
for persistent context. If None, a fresh browser instance
|
|
854
|
+
is used without saving data. (default: :obj:`None`)
|
|
1051
855
|
"""
|
|
856
|
+
super().__init__() # Call to super().__init__() added
|
|
857
|
+
|
|
858
|
+
# auto-detect if running inside a CAMEL runtime container
|
|
859
|
+
# force headless mode since containers typically don't have a display
|
|
860
|
+
in_runtime = os.environ.get("CAMEL_RUNTIME", "").lower() == "true"
|
|
861
|
+
if in_runtime and not headless:
|
|
862
|
+
logger.info(
|
|
863
|
+
"Detected CAMEL_RUNTIME environment - enabling headless mode "
|
|
864
|
+
"since containers typically don't have a display"
|
|
865
|
+
)
|
|
866
|
+
headless = True
|
|
1052
867
|
|
|
1053
868
|
self.browser = BaseBrowser(
|
|
1054
869
|
headless=headless,
|
|
1055
870
|
cache_dir=cache_dir,
|
|
1056
871
|
channel=channel,
|
|
1057
872
|
cookie_json_path=cookie_json_path,
|
|
873
|
+
user_data_dir=user_data_dir,
|
|
1058
874
|
)
|
|
875
|
+
self.browser.web_agent_model = web_agent_model # Pass model to
|
|
876
|
+
# BaseBrowser instance
|
|
1059
877
|
|
|
1060
878
|
self.history_window = history_window
|
|
1061
879
|
self.web_agent_model = web_agent_model
|
|
1062
880
|
self.planning_agent_model = planning_agent_model
|
|
1063
881
|
self.output_language = output_language
|
|
1064
882
|
|
|
1065
|
-
self.history:
|
|
1066
|
-
self.web_agent
|
|
883
|
+
self.history: List[Dict[str, Any]] = [] # Typed history list
|
|
884
|
+
self.web_agent: ChatAgent
|
|
885
|
+
self.planning_agent: ChatAgent
|
|
886
|
+
self.web_agent, self.planning_agent = self._initialize_agent(
|
|
887
|
+
web_agent_model, planning_agent_model
|
|
888
|
+
)
|
|
1067
889
|
|
|
1068
890
|
def _reset(self):
|
|
1069
891
|
self.web_agent.reset()
|
|
@@ -1071,43 +893,40 @@ class BrowserToolkit(BaseToolkit):
|
|
|
1071
893
|
self.history = []
|
|
1072
894
|
os.makedirs(self.browser.cache_dir, exist_ok=True)
|
|
1073
895
|
|
|
1074
|
-
def _initialize_agent(
|
|
896
|
+
def _initialize_agent(
|
|
897
|
+
self,
|
|
898
|
+
web_agent_model_backend: Optional[BaseModelBackend],
|
|
899
|
+
planning_agent_model_backend: Optional[BaseModelBackend],
|
|
900
|
+
) -> Tuple[ChatAgent, ChatAgent]:
|
|
1075
901
|
r"""Initialize the agent."""
|
|
1076
902
|
from camel.agents import ChatAgent
|
|
1077
903
|
|
|
1078
|
-
if
|
|
1079
|
-
|
|
1080
|
-
model_platform=ModelPlatformType.
|
|
1081
|
-
model_type=ModelType.
|
|
904
|
+
if web_agent_model_backend is None:
|
|
905
|
+
web_agent_model_instance = ModelFactory.create(
|
|
906
|
+
model_platform=ModelPlatformType.DEFAULT,
|
|
907
|
+
model_type=ModelType.DEFAULT,
|
|
1082
908
|
model_config_dict={"temperature": 0, "top_p": 1},
|
|
1083
909
|
)
|
|
1084
910
|
else:
|
|
1085
|
-
|
|
911
|
+
web_agent_model_instance = web_agent_model_backend
|
|
1086
912
|
|
|
1087
|
-
if
|
|
913
|
+
if planning_agent_model_backend is None:
|
|
1088
914
|
planning_model = ModelFactory.create(
|
|
1089
|
-
model_platform=ModelPlatformType.
|
|
1090
|
-
model_type=ModelType.
|
|
915
|
+
model_platform=ModelPlatformType.DEFAULT,
|
|
916
|
+
model_type=ModelType.DEFAULT,
|
|
1091
917
|
)
|
|
1092
918
|
else:
|
|
1093
|
-
planning_model =
|
|
919
|
+
planning_model = planning_agent_model_backend
|
|
1094
920
|
|
|
1095
|
-
system_prompt =
|
|
1096
|
-
You are a helpful web agent that can assist users in browsing the web.
|
|
1097
|
-
Given a high-level task, you can leverage predefined browser tools to help
|
|
1098
|
-
users achieve their goals.
|
|
1099
|
-
"""
|
|
921
|
+
system_prompt = WEB_AGENT_SYSTEM_PROMPT
|
|
1100
922
|
|
|
1101
923
|
web_agent = ChatAgent(
|
|
1102
924
|
system_message=system_prompt,
|
|
1103
|
-
model=
|
|
925
|
+
model=web_agent_model_instance,
|
|
1104
926
|
output_language=self.output_language,
|
|
1105
927
|
)
|
|
1106
928
|
|
|
1107
|
-
planning_system_prompt =
|
|
1108
|
-
You are a helpful planning agent that can assist users in planning complex
|
|
1109
|
-
tasks which need multi-step browser interaction.
|
|
1110
|
-
"""
|
|
929
|
+
planning_system_prompt = PLANNING_AGENT_SYSTEM_PROMPT
|
|
1111
930
|
|
|
1112
931
|
planning_agent = ChatAgent(
|
|
1113
932
|
system_message=planning_system_prompt,
|
|
@@ -1120,96 +939,24 @@ tasks which need multi-step browser interaction.
|
|
|
1120
939
|
def _observe(
|
|
1121
940
|
self, task_prompt: str, detailed_plan: Optional[str] = None
|
|
1122
941
|
) -> Tuple[str, str, str]:
|
|
1123
|
-
r"""Let agent observe the current environment, and get the next
|
|
942
|
+
r"""Let agent observe the current environment, and get the next
|
|
943
|
+
action."""
|
|
1124
944
|
|
|
1125
|
-
|
|
945
|
+
detailed_plan_prompt_str = ""
|
|
1126
946
|
|
|
1127
947
|
if detailed_plan is not None:
|
|
1128
|
-
|
|
948
|
+
detailed_plan_prompt_str = f"""
|
|
1129
949
|
Here is a plan about how to solve the task step-by-step which you must follow:
|
|
1130
950
|
<detailed_plan>{detailed_plan}<detailed_plan>
|
|
1131
951
|
"""
|
|
1132
952
|
|
|
1133
|
-
observe_prompt =
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
the browser, and provide the next appropriate action to take.
|
|
1141
|
-
|
|
1142
|
-
{detailed_plan_prompt}
|
|
1143
|
-
|
|
1144
|
-
Here are the current available browser functions you can use:
|
|
1145
|
-
{AVAILABLE_ACTIONS_PROMPT}
|
|
1146
|
-
|
|
1147
|
-
Here are the latest {self.history_window} trajectory (at most) you have taken:
|
|
1148
|
-
<history>
|
|
1149
|
-
{self.history[-self.history_window :]}
|
|
1150
|
-
</history>
|
|
1151
|
-
|
|
1152
|
-
Your output should be in json format, including the following fields:
|
|
1153
|
-
- `observation`: The detailed image description about the current viewport. Do
|
|
1154
|
-
not over-confident about the correctness of the history actions. You should
|
|
1155
|
-
always check the current viewport to make sure the correctness of the next
|
|
1156
|
-
action.
|
|
1157
|
-
- `reasoning`: The reasoning about the next action you want to take, and the
|
|
1158
|
-
possible obstacles you may encounter, and how to solve them. Do not forget to
|
|
1159
|
-
check the history actions to avoid the same mistakes.
|
|
1160
|
-
- `action_code`: The action code you want to take. It is only one step action
|
|
1161
|
-
code, without any other texts (such as annotation)
|
|
1162
|
-
|
|
1163
|
-
Here is two example of the output:
|
|
1164
|
-
```json
|
|
1165
|
-
{{
|
|
1166
|
-
"observation": [IMAGE_DESCRIPTION],
|
|
1167
|
-
"reasoning": [YOUR_REASONING],
|
|
1168
|
-
"action_code": "fill_input_id([ID], [TEXT])"
|
|
1169
|
-
}}
|
|
1170
|
-
|
|
1171
|
-
{{
|
|
1172
|
-
"observation": "The current page is a CAPTCHA verification page on Amazon. It asks the user to ..",
|
|
1173
|
-
"reasoning": "To proceed with the task of searching for products, I need to complete..",
|
|
1174
|
-
"action_code": "fill_input_id(3, 'AUXPMR')"
|
|
1175
|
-
}}
|
|
1176
|
-
|
|
1177
|
-
Here are some tips for you:
|
|
1178
|
-
- Never forget the overall question: **{task_prompt}**
|
|
1179
|
-
- Maybe after a certain operation (e.g. click_id), the page content has not
|
|
1180
|
-
changed. You can check whether the action step is successful by looking at the
|
|
1181
|
-
`success` of the action step in the history. If successful, it means that the
|
|
1182
|
-
page content is indeed the same after the click. You need to try other methods.
|
|
1183
|
-
- If using one way to solve the problem is not successful, try other ways.
|
|
1184
|
-
Make sure your provided ID is correct!
|
|
1185
|
-
- Some cases are very complex and need to be achieve by an iterative process.
|
|
1186
|
-
You can use the `back()` function to go back to the previous page to try other
|
|
1187
|
-
methods.
|
|
1188
|
-
- There are many links on the page, which may be useful for solving the
|
|
1189
|
-
problem. You can use the `click_id()` function to click on the link to see if
|
|
1190
|
-
it is useful.
|
|
1191
|
-
- Always keep in mind that your action must be based on the ID shown in the
|
|
1192
|
-
current image or viewport, not the ID shown in the history.
|
|
1193
|
-
- Do not use `stop()` lightly. Always remind yourself that the image only
|
|
1194
|
-
shows a part of the full page. If you cannot find the answer, try to use
|
|
1195
|
-
functions like `scroll_up()` and `scroll_down()` to check the full content of
|
|
1196
|
-
the webpage before doing anything else, because the answer or next key step
|
|
1197
|
-
may be hidden in the content below.
|
|
1198
|
-
- If the webpage needs human verification, you must avoid processing it.
|
|
1199
|
-
Please use `back()` to go back to the previous page, and try other ways.
|
|
1200
|
-
- If you have tried everything and still cannot resolve the issue, please stop
|
|
1201
|
-
the simulation, and report issues you have encountered.
|
|
1202
|
-
- Check the history actions carefully, detect whether you have repeatedly made
|
|
1203
|
-
the same actions or not.
|
|
1204
|
-
- When dealing with wikipedia revision history related tasks, you need to
|
|
1205
|
-
think about the solution flexibly. First, adjust the browsing history
|
|
1206
|
-
displayed on a single page to the maximum, and then make use of the
|
|
1207
|
-
find_text_on_page function. This is extremely useful which can quickly locate
|
|
1208
|
-
the text you want to find and skip massive amount of useless information.
|
|
1209
|
-
- Flexibly use interactive elements like slide down selection bar to filter
|
|
1210
|
-
out the information you need. Sometimes they are extremely useful.
|
|
1211
|
-
```
|
|
1212
|
-
"""
|
|
953
|
+
observe_prompt = OBSERVE_PROMPT_TEMPLATE.format(
|
|
954
|
+
task_prompt=task_prompt,
|
|
955
|
+
detailed_plan_prompt=detailed_plan_prompt_str,
|
|
956
|
+
AVAILABLE_ACTIONS_PROMPT=AVAILABLE_ACTIONS_PROMPT,
|
|
957
|
+
history_window=self.history_window,
|
|
958
|
+
history=self.history[-self.history_window :],
|
|
959
|
+
)
|
|
1213
960
|
|
|
1214
961
|
# get current state
|
|
1215
962
|
som_screenshot, _ = self.browser.get_som_screenshot(save_image=True)
|
|
@@ -1223,7 +970,8 @@ out the information you need. Sometimes they are extremely useful.
|
|
|
1223
970
|
|
|
1224
971
|
resp_content = resp.msgs[0].content
|
|
1225
972
|
|
|
1226
|
-
resp_dict = _parse_json_output(resp_content)
|
|
973
|
+
resp_dict = _parse_json_output(resp_content, logger) # Pass logger to
|
|
974
|
+
# _parse_json_output
|
|
1227
975
|
observation_result: str = resp_dict.get("observation", "")
|
|
1228
976
|
reasoning_result: str = resp_dict.get("reasoning", "")
|
|
1229
977
|
action_code: str = resp_dict.get("action_code", "")
|
|
@@ -1244,7 +992,10 @@ out the information you need. Sometimes they are extremely useful.
|
|
|
1244
992
|
id_part = (
|
|
1245
993
|
parts[0].replace("fill_input_id(", "").strip()
|
|
1246
994
|
)
|
|
1247
|
-
action_code =
|
|
995
|
+
action_code = (
|
|
996
|
+
f"fill_input_id({id_part}, 'Please "
|
|
997
|
+
f"fill the text here.')"
|
|
998
|
+
)
|
|
1248
999
|
|
|
1249
1000
|
action_code = action_code.replace("`", "").strip()
|
|
1250
1001
|
|
|
@@ -1346,43 +1097,36 @@ out the information you need. Sometimes they are extremely useful.
|
|
|
1346
1097
|
)
|
|
1347
1098
|
|
|
1348
1099
|
def _get_final_answer(self, task_prompt: str) -> str:
|
|
1349
|
-
r"""Get the final answer based on the task prompt and current
|
|
1350
|
-
|
|
1100
|
+
r"""Get the final answer based on the task prompt and current
|
|
1101
|
+
browser state.
|
|
1102
|
+
It is used when the agent thinks that the task can be completed
|
|
1103
|
+
without any further action, and answer can be directly found in the
|
|
1104
|
+
current viewport.
|
|
1351
1105
|
"""
|
|
1352
1106
|
|
|
1353
|
-
prompt =
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
<history>{self.history}</history>
|
|
1357
|
-
Please find the final answer, or give valuable insights and founds (e.g. if previous actions contain downloading files, your output should include the path of the downloaded file) about the overall task: <task>{task_prompt}</task>
|
|
1358
|
-
"""
|
|
1107
|
+
prompt = GET_FINAL_ANSWER_PROMPT_TEMPLATE.format(
|
|
1108
|
+
history=self.history, task_prompt=task_prompt
|
|
1109
|
+
)
|
|
1359
1110
|
|
|
1360
1111
|
message = BaseMessage.make_user_message(
|
|
1361
1112
|
role_name='user',
|
|
1362
1113
|
content=prompt,
|
|
1363
1114
|
)
|
|
1364
|
-
|
|
1115
|
+
self.web_agent.reset() # Reset before step
|
|
1365
1116
|
resp = self.web_agent.step(message)
|
|
1366
1117
|
return resp.msgs[0].content
|
|
1367
1118
|
|
|
1368
1119
|
def _task_planning(self, task_prompt: str, start_url: str) -> str:
|
|
1369
1120
|
r"""Plan the task based on the given task prompt."""
|
|
1370
1121
|
|
|
1371
|
-
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
<task>{task_prompt}</task>
|
|
1375
|
-
According to the problem above, if we use browser interaction, what is the general process of the interaction after visiting the webpage `{start_url}`?
|
|
1376
|
-
|
|
1377
|
-
Please note that it can be viewed as Partially Observable MDP. Do not over-confident about your plan.
|
|
1378
|
-
Please first restate the task in detail, and then provide a detailed plan to solve the task.
|
|
1379
|
-
"""
|
|
1380
|
-
# Here are some tips for you: Please note that we can only see a part of the full page because of the limited viewport after an action. Thus, do not forget to use methods like `scroll_up()` and `scroll_down()` to check the full content of the webpage, because the answer or next key step may be hidden in the content below.
|
|
1122
|
+
planning_prompt = TASK_PLANNING_PROMPT_TEMPLATE.format(
|
|
1123
|
+
task_prompt=task_prompt, start_url=start_url
|
|
1124
|
+
)
|
|
1381
1125
|
|
|
1382
1126
|
message = BaseMessage.make_user_message(
|
|
1383
1127
|
role_name='user', content=planning_prompt
|
|
1384
1128
|
)
|
|
1385
|
-
|
|
1129
|
+
self.planning_agent.reset() # Reset before step
|
|
1386
1130
|
resp = self.planning_agent.step(message)
|
|
1387
1131
|
return resp.msgs[0].content
|
|
1388
1132
|
|
|
@@ -1396,35 +1140,26 @@ Please first restate the task in detail, and then provide a detailed plan to sol
|
|
|
1396
1140
|
detailed_plan (str): The detailed plan to replan.
|
|
1397
1141
|
|
|
1398
1142
|
Returns:
|
|
1399
|
-
Tuple[bool, str]: A tuple containing a boolean indicating
|
|
1143
|
+
Tuple[bool, str]: A tuple containing a boolean indicating
|
|
1144
|
+
whether the task needs to be replanned, and the replanned schema.
|
|
1400
1145
|
"""
|
|
1401
1146
|
|
|
1402
|
-
|
|
1403
|
-
|
|
1404
|
-
|
|
1405
|
-
|
|
1406
|
-
|
|
1407
|
-
|
|
1408
|
-
In order to solve the task, we made a detailed plan previously. Here is the detailed plan:
|
|
1409
|
-
<detailed plan>{detailed_plan}</detailed plan>
|
|
1410
|
-
|
|
1411
|
-
According to the task above, we have made a series of observations, reasonings, and actions. Here are the latest {self.history_window} trajectory (at most) we have taken:
|
|
1412
|
-
<history>{self.history[-self.history_window :]}</history>
|
|
1413
|
-
|
|
1414
|
-
However, the task is not completed yet. As the task is partially observable, we may need to replan the task based on the current state of the browser if necessary.
|
|
1415
|
-
Now please carefully examine the current task planning schema, and our history actions, and then judge whether the task needs to be fundamentally replanned. If so, please provide a detailed replanned schema (including the restated overall task).
|
|
1416
|
-
|
|
1417
|
-
Your output should be in json format, including the following fields:
|
|
1418
|
-
- `if_need_replan`: bool, A boolean value indicating whether the task needs to be fundamentally replanned.
|
|
1419
|
-
- `replanned_schema`: str, The replanned schema for the task, which should not be changed too much compared with the original one. If the task does not need to be replanned, the value should be an empty string.
|
|
1420
|
-
"""
|
|
1147
|
+
replanning_prompt = TASK_REPLANNING_PROMPT_TEMPLATE.format(
|
|
1148
|
+
task_prompt=task_prompt,
|
|
1149
|
+
detailed_plan=detailed_plan,
|
|
1150
|
+
history_window=self.history_window,
|
|
1151
|
+
history=self.history[-self.history_window :],
|
|
1152
|
+
)
|
|
1421
1153
|
# Reset the history message of planning_agent.
|
|
1422
1154
|
self.planning_agent.reset()
|
|
1423
1155
|
resp = self.planning_agent.step(replanning_prompt)
|
|
1424
|
-
resp_dict = _parse_json_output(
|
|
1156
|
+
resp_dict = _parse_json_output(
|
|
1157
|
+
resp.msgs[0].content, logger
|
|
1158
|
+
) # Pass logger
|
|
1425
1159
|
|
|
1426
|
-
|
|
1427
|
-
|
|
1160
|
+
if_need_replan_eval = resp_dict.get("if_need_replan", False)
|
|
1161
|
+
if_need_replan = cast(bool, if_need_replan_eval) # Ensure bool
|
|
1162
|
+
replanned_schema: str = resp_dict.get("replanned_schema", "")
|
|
1428
1163
|
|
|
1429
1164
|
if if_need_replan:
|
|
1430
1165
|
return True, replanned_schema
|
|
@@ -1463,10 +1198,10 @@ Your output should be in json format, including the following fields:
|
|
|
1463
1198
|
logger.debug(f"Observation: {observation}")
|
|
1464
1199
|
logger.debug(f"Reasoning: {reasoning}")
|
|
1465
1200
|
logger.debug(f"Action code: {action_code}")
|
|
1466
|
-
|
|
1201
|
+
trajectory_info: Dict[str, Any]
|
|
1467
1202
|
if "stop" in action_code:
|
|
1468
1203
|
task_completed = True
|
|
1469
|
-
trajectory_info = {
|
|
1204
|
+
trajectory_info = { # Typed trajectory_info
|
|
1470
1205
|
"round": i,
|
|
1471
1206
|
"observation": observation,
|
|
1472
1207
|
"thought": reasoning,
|
|
@@ -1483,7 +1218,7 @@ Your output should be in json format, including the following fields:
|
|
|
1483
1218
|
if not success:
|
|
1484
1219
|
logger.warning(f"Error while executing the action: {info}")
|
|
1485
1220
|
|
|
1486
|
-
trajectory_info = {
|
|
1221
|
+
trajectory_info = { # Typed trajectory_info
|
|
1487
1222
|
"round": i,
|
|
1488
1223
|
"observation": observation,
|
|
1489
1224
|
"thought": reasoning,
|
|
@@ -1502,15 +1237,20 @@ Your output should be in json format, including the following fields:
|
|
|
1502
1237
|
detailed_plan = replanned_schema
|
|
1503
1238
|
logger.debug(f"Replanned schema: {replanned_schema}")
|
|
1504
1239
|
|
|
1240
|
+
simulation_result: str
|
|
1505
1241
|
if not task_completed:
|
|
1506
1242
|
simulation_result = f"""
|
|
1507
|
-
The task is not completed within the round limit. Please
|
|
1243
|
+
The task is not completed within the round limit. Please
|
|
1244
|
+
check the last round {self.history_window} information to
|
|
1245
|
+
see if there is any useful information:
|
|
1508
1246
|
<history>{self.history[-self.history_window :]}</history>
|
|
1509
1247
|
"""
|
|
1510
1248
|
|
|
1511
1249
|
else:
|
|
1512
1250
|
simulation_result = self._get_final_answer(task_prompt)
|
|
1513
1251
|
|
|
1252
|
+
self.browser.close() # Close browser after task completion or limit
|
|
1253
|
+
# reached
|
|
1514
1254
|
return simulation_result
|
|
1515
1255
|
|
|
1516
1256
|
def get_tools(self) -> List[FunctionTool]:
|