camel-ai 0.2.59__py3-none-any.whl → 0.2.82__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +3 -3
- camel/agents/__init__.py +2 -2
- camel/agents/_types.py +9 -4
- camel/agents/_utils.py +40 -2
- camel/agents/base.py +2 -2
- camel/agents/chat_agent.py +5012 -902
- camel/agents/critic_agent.py +2 -2
- camel/agents/deductive_reasoner_agent.py +56 -56
- camel/agents/embodied_agent.py +2 -2
- camel/agents/knowledge_graph_agent.py +20 -20
- camel/agents/mcp_agent.py +39 -36
- camel/agents/multi_hop_generator_agent.py +3 -3
- camel/agents/programmed_agent_instruction.py +2 -2
- camel/agents/repo_agent.py +4 -3
- camel/agents/role_assignment_agent.py +2 -2
- camel/agents/search_agent.py +2 -2
- camel/agents/task_agent.py +2 -2
- camel/agents/tool_agents/__init__.py +2 -2
- camel/agents/tool_agents/base.py +2 -2
- camel/agents/tool_agents/hugging_face_tool_agent.py +3 -3
- camel/benchmarks/__init__.py +2 -2
- camel/benchmarks/apibank.py +5 -5
- camel/benchmarks/apibench.py +2 -2
- camel/benchmarks/base.py +2 -2
- camel/benchmarks/browsecomp.py +44 -33
- camel/benchmarks/gaia.py +17 -13
- camel/benchmarks/mock_website/README.md +94 -0
- camel/benchmarks/mock_website/mock_web.py +299 -0
- camel/benchmarks/mock_website/requirements.txt +3 -0
- camel/benchmarks/mock_website/shopping_mall/app.py +465 -0
- camel/benchmarks/mock_website/task.json +104 -0
- camel/benchmarks/nexus.py +3 -3
- camel/benchmarks/ragbench.py +2 -2
- camel/bots/__init__.py +2 -2
- camel/bots/discord/__init__.py +2 -2
- camel/bots/discord/discord_app.py +2 -2
- camel/bots/discord/discord_installation.py +2 -2
- camel/bots/discord/discord_store.py +3 -3
- camel/bots/slack/__init__.py +2 -2
- camel/bots/slack/models.py +4 -4
- camel/bots/slack/slack_app.py +2 -2
- camel/bots/telegram_bot.py +2 -2
- camel/configs/__init__.py +26 -2
- camel/configs/aihubmix_config.py +90 -0
- camel/configs/aiml_config.py +2 -2
- camel/configs/amd_config.py +70 -0
- camel/configs/anthropic_config.py +8 -7
- camel/configs/base_config.py +2 -2
- camel/configs/bedrock_config.py +5 -3
- camel/configs/cerebras_config.py +98 -0
- camel/configs/cohere_config.py +3 -3
- camel/configs/cometapi_config.py +106 -0
- camel/configs/crynux_config.py +94 -0
- camel/configs/deepseek_config.py +9 -8
- camel/configs/gemini_config.py +6 -4
- camel/configs/groq_config.py +6 -4
- camel/configs/internlm_config.py +6 -4
- camel/configs/litellm_config.py +2 -2
- camel/configs/lmstudio_config.py +6 -4
- camel/configs/minimax_config.py +95 -0
- camel/configs/mistral_config.py +3 -3
- camel/configs/modelscope_config.py +5 -3
- camel/configs/moonshot_config.py +2 -2
- camel/configs/nebius_config.py +105 -0
- camel/configs/netmind_config.py +2 -2
- camel/configs/novita_config.py +2 -2
- camel/configs/nvidia_config.py +2 -2
- camel/configs/ollama_config.py +2 -2
- camel/configs/openai_config.py +8 -3
- camel/configs/openrouter_config.py +6 -4
- camel/configs/ppio_config.py +2 -2
- camel/configs/qianfan_config.py +85 -0
- camel/configs/qwen_config.py +2 -2
- camel/configs/reka_config.py +3 -3
- camel/configs/samba_config.py +8 -6
- camel/configs/sglang_config.py +2 -2
- camel/configs/siliconflow_config.py +2 -2
- camel/configs/togetherai_config.py +2 -2
- camel/configs/vllm_config.py +4 -2
- camel/configs/watsonx_config.py +2 -2
- camel/configs/yi_config.py +6 -4
- camel/configs/zhipuai_config.py +6 -4
- camel/{data_collector → data_collectors}/__init__.py +2 -2
- camel/{data_collector → data_collectors}/alpaca_collector.py +19 -10
- camel/{data_collector → data_collectors}/base.py +2 -2
- camel/{data_collector → data_collectors}/sharegpt_collector.py +3 -3
- camel/datagen/__init__.py +2 -2
- camel/datagen/cot_datagen.py +32 -37
- camel/datagen/evol_instruct/__init__.py +2 -2
- camel/datagen/evol_instruct/evol_instruct.py +2 -2
- camel/datagen/evol_instruct/scorer.py +24 -25
- camel/datagen/evol_instruct/templates.py +48 -48
- camel/datagen/self_improving_cot.py +5 -5
- camel/datagen/self_instruct/__init__.py +2 -2
- camel/datagen/self_instruct/filter/__init__.py +2 -2
- camel/datagen/self_instruct/filter/filter_function.py +2 -2
- camel/datagen/self_instruct/filter/filter_registry.py +2 -2
- camel/datagen/self_instruct/filter/instruction_filter.py +2 -2
- camel/datagen/self_instruct/self_instruct.py +2 -2
- camel/datagen/self_instruct/templates.py +47 -47
- camel/datagen/source2synth/__init__.py +2 -2
- camel/datagen/source2synth/data_processor.py +2 -2
- camel/datagen/source2synth/models.py +2 -2
- camel/datagen/source2synth/user_data_processor_config.py +2 -2
- camel/datahubs/__init__.py +2 -2
- camel/datahubs/base.py +2 -2
- camel/datahubs/huggingface.py +2 -2
- camel/datahubs/models.py +2 -2
- camel/datasets/__init__.py +2 -2
- camel/datasets/base_generator.py +41 -12
- camel/datasets/few_shot_generator.py +18 -18
- camel/datasets/models.py +3 -3
- camel/datasets/self_instruct_generator.py +2 -2
- camel/datasets/static_dataset.py +152 -2
- camel/embeddings/__init__.py +2 -2
- camel/embeddings/azure_embedding.py +2 -2
- camel/embeddings/base.py +2 -2
- camel/embeddings/gemini_embedding.py +2 -2
- camel/embeddings/jina_embedding.py +10 -3
- camel/embeddings/mistral_embedding.py +2 -2
- camel/embeddings/openai_compatible_embedding.py +2 -2
- camel/embeddings/openai_embedding.py +2 -2
- camel/embeddings/sentence_transformers_embeddings.py +4 -4
- camel/embeddings/together_embedding.py +2 -2
- camel/embeddings/vlm_embedding.py +11 -4
- camel/environments/__init__.py +14 -2
- camel/environments/models.py +2 -2
- camel/environments/multi_step.py +2 -2
- camel/environments/rlcards_env.py +860 -0
- camel/environments/single_step.py +30 -5
- camel/environments/tic_tac_toe.py +3 -3
- camel/extractors/__init__.py +2 -2
- camel/extractors/base.py +2 -2
- camel/extractors/python_strategies.py +2 -2
- camel/generators.py +2 -2
- camel/human.py +2 -2
- camel/interpreters/__init__.py +4 -2
- camel/interpreters/base.py +16 -3
- camel/interpreters/docker/Dockerfile +53 -7
- camel/interpreters/docker_interpreter.py +70 -11
- camel/interpreters/e2b_interpreter.py +59 -11
- camel/interpreters/internal_python_interpreter.py +81 -4
- camel/interpreters/interpreter_error.py +2 -2
- camel/interpreters/ipython_interpreter.py +23 -5
- camel/interpreters/microsandbox_interpreter.py +395 -0
- camel/interpreters/subprocess_interpreter.py +36 -4
- camel/loaders/__init__.py +17 -5
- camel/loaders/apify_reader.py +2 -2
- camel/loaders/base_io.py +2 -2
- camel/loaders/base_loader.py +85 -0
- camel/loaders/chunkr_reader.py +128 -93
- camel/loaders/crawl4ai_reader.py +2 -2
- camel/loaders/firecrawl_reader.py +6 -6
- camel/loaders/jina_url_reader.py +2 -2
- camel/loaders/markitdown.py +2 -2
- camel/loaders/mineru_extractor.py +2 -2
- camel/loaders/mistral_reader.py +148 -0
- camel/loaders/scrapegraph_reader.py +2 -2
- camel/loaders/unstructured_io.py +2 -2
- camel/logger.py +5 -5
- camel/memories/__init__.py +2 -2
- camel/memories/agent_memories.py +86 -3
- camel/memories/base.py +36 -2
- camel/memories/blocks/__init__.py +2 -2
- camel/memories/blocks/chat_history_block.py +126 -9
- camel/memories/blocks/vectordb_block.py +10 -3
- camel/memories/context_creators/__init__.py +2 -2
- camel/memories/context_creators/score_based.py +31 -239
- camel/memories/records.py +98 -13
- camel/messages/__init__.py +2 -2
- camel/messages/base.py +193 -46
- camel/messages/conversion/__init__.py +2 -2
- camel/messages/conversion/alpaca.py +2 -2
- camel/messages/conversion/conversation_models.py +2 -2
- camel/messages/conversion/sharegpt/__init__.py +2 -2
- camel/messages/conversion/sharegpt/function_call_formatter.py +2 -2
- camel/messages/conversion/sharegpt/hermes/__init__.py +2 -2
- camel/messages/conversion/sharegpt/hermes/hermes_function_formatter.py +2 -2
- camel/messages/func_message.py +54 -17
- camel/models/__init__.py +18 -2
- camel/models/_utils.py +3 -3
- camel/models/aihubmix_model.py +83 -0
- camel/models/aiml_model.py +11 -18
- camel/models/amd_model.py +101 -0
- camel/models/anthropic_model.py +127 -20
- camel/models/aws_bedrock_model.py +12 -35
- camel/models/azure_openai_model.py +263 -63
- camel/models/base_audio_model.py +5 -3
- camel/models/base_model.py +195 -26
- camel/models/cerebras_model.py +83 -0
- camel/models/cohere_model.py +81 -21
- camel/models/cometapi_model.py +83 -0
- camel/models/crynux_model.py +87 -0
- camel/models/deepseek_model.py +61 -59
- camel/models/fish_audio_model.py +8 -2
- camel/models/gemini_model.py +439 -30
- camel/models/groq_model.py +11 -19
- camel/models/internlm_model.py +11 -18
- camel/models/litellm_model.py +94 -34
- camel/models/lmstudio_model.py +17 -20
- camel/models/minimax_model.py +83 -0
- camel/models/mistral_model.py +84 -19
- camel/models/model_factory.py +49 -6
- camel/models/model_manager.py +33 -11
- camel/models/modelscope_model.py +13 -193
- camel/models/moonshot_model.py +195 -21
- camel/models/nebius_model.py +83 -0
- camel/models/nemotron_model.py +19 -9
- camel/models/netmind_model.py +11 -18
- camel/models/novita_model.py +11 -18
- camel/models/nvidia_model.py +11 -18
- camel/models/ollama_model.py +14 -21
- camel/models/openai_audio_models.py +2 -2
- camel/models/openai_compatible_model.py +234 -27
- camel/models/openai_model.py +255 -39
- camel/models/openrouter_model.py +11 -19
- camel/models/ppio_model.py +11 -18
- camel/models/qianfan_model.py +89 -0
- camel/models/qwen_model.py +13 -193
- camel/models/reka_model.py +90 -21
- camel/models/reward/__init__.py +2 -2
- camel/models/reward/base_reward_model.py +2 -2
- camel/models/reward/evaluator.py +2 -2
- camel/models/reward/nemotron_model.py +2 -2
- camel/models/reward/skywork_model.py +2 -2
- camel/models/samba_model.py +117 -49
- camel/models/sglang_model.py +162 -42
- camel/models/siliconflow_model.py +12 -35
- camel/models/stub_model.py +10 -7
- camel/models/togetherai_model.py +11 -18
- camel/models/vllm_model.py +10 -18
- camel/models/volcano_model.py +16 -20
- camel/models/watsonx_model.py +69 -19
- camel/models/yi_model.py +11 -18
- camel/models/zhipuai_model.py +70 -18
- camel/parsers/__init__.py +18 -0
- camel/parsers/mcp_tool_call_parser.py +176 -0
- camel/personas/__init__.py +2 -2
- camel/personas/persona.py +2 -2
- camel/personas/persona_hub.py +2 -2
- camel/prompts/__init__.py +2 -2
- camel/prompts/ai_society.py +2 -2
- camel/prompts/base.py +2 -2
- camel/prompts/code.py +2 -2
- camel/prompts/evaluation.py +2 -2
- camel/prompts/generate_text_embedding_data.py +2 -2
- camel/prompts/image_craft.py +2 -2
- camel/prompts/misalignment.py +2 -2
- camel/prompts/multi_condition_image_craft.py +2 -2
- camel/prompts/object_recognition.py +2 -2
- camel/prompts/persona_hub.py +3 -3
- camel/prompts/prompt_templates.py +2 -2
- camel/prompts/role_description_prompt_template.py +2 -2
- camel/prompts/solution_extraction.py +8 -8
- camel/prompts/task_prompt_template.py +2 -2
- camel/prompts/translation.py +2 -2
- camel/prompts/video_description_prompt.py +3 -3
- camel/responses/__init__.py +2 -2
- camel/responses/agent_responses.py +2 -2
- camel/retrievers/__init__.py +2 -2
- camel/retrievers/auto_retriever.py +23 -3
- camel/retrievers/base.py +2 -2
- camel/retrievers/bm25_retriever.py +3 -4
- camel/retrievers/cohere_rerank_retriever.py +2 -2
- camel/retrievers/hybrid_retrival.py +4 -4
- camel/retrievers/vector_retriever.py +2 -2
- camel/runtimes/Dockerfile.multi-toolkit +90 -0
- camel/{runtime → runtimes}/__init__.py +2 -2
- camel/runtimes/api.py +153 -0
- camel/{runtime → runtimes}/base.py +2 -2
- camel/{runtime → runtimes}/configs.py +13 -13
- camel/{runtime → runtimes}/daytona_runtime.py +18 -19
- camel/{runtime → runtimes}/docker_runtime.py +13 -13
- camel/{runtime → runtimes}/llm_guard_runtime.py +28 -28
- camel/{runtime → runtimes}/remote_http_runtime.py +12 -12
- camel/{runtime → runtimes}/ubuntu_docker_runtime.py +3 -3
- camel/{runtime → runtimes}/utils/__init__.py +2 -2
- camel/{runtime → runtimes}/utils/function_risk_toolkit.py +2 -2
- camel/{runtime → runtimes}/utils/ignore_risk_toolkit.py +2 -2
- camel/schemas/__init__.py +2 -2
- camel/schemas/base.py +2 -2
- camel/schemas/openai_converter.py +3 -3
- camel/schemas/outlines_converter.py +2 -2
- camel/services/agent_openapi_server.py +380 -0
- camel/societies/__init__.py +4 -2
- camel/societies/babyagi_playing.py +2 -2
- camel/societies/role_playing.py +201 -80
- camel/societies/workforce/__init__.py +10 -3
- camel/societies/workforce/base.py +9 -5
- camel/societies/workforce/events.py +143 -0
- camel/societies/workforce/prompts.py +258 -33
- camel/societies/workforce/role_playing_worker.py +95 -30
- camel/societies/workforce/single_agent_worker.py +659 -30
- camel/societies/workforce/structured_output_handler.py +512 -0
- camel/societies/workforce/task_channel.py +182 -38
- camel/societies/workforce/utils.py +784 -18
- camel/societies/workforce/worker.py +96 -28
- camel/societies/workforce/workflow_memory_manager.py +1746 -0
- camel/societies/workforce/workforce.py +5730 -366
- camel/societies/workforce/workforce_callback.py +103 -0
- camel/societies/workforce/workforce_logger.py +647 -0
- camel/societies/workforce/workforce_metrics.py +33 -0
- camel/storages/__init__.py +10 -2
- camel/storages/graph_storages/__init__.py +2 -2
- camel/storages/graph_storages/base.py +2 -2
- camel/storages/graph_storages/graph_element.py +2 -2
- camel/storages/graph_storages/nebula_graph.py +4 -4
- camel/storages/graph_storages/neo4j_graph.py +7 -7
- camel/storages/key_value_storages/__init__.py +2 -2
- camel/storages/key_value_storages/base.py +2 -2
- camel/storages/key_value_storages/in_memory.py +2 -2
- camel/storages/key_value_storages/json.py +17 -4
- camel/storages/key_value_storages/mem0_cloud.py +50 -49
- camel/storages/key_value_storages/redis.py +2 -2
- camel/storages/object_storages/__init__.py +2 -2
- camel/storages/object_storages/amazon_s3.py +2 -2
- camel/storages/object_storages/azure_blob.py +2 -2
- camel/storages/object_storages/base.py +2 -2
- camel/storages/object_storages/google_cloud.py +3 -3
- camel/storages/vectordb_storages/__init__.py +12 -2
- camel/storages/vectordb_storages/base.py +2 -2
- camel/storages/vectordb_storages/chroma.py +731 -0
- camel/storages/vectordb_storages/faiss.py +712 -0
- camel/storages/vectordb_storages/milvus.py +2 -2
- camel/storages/vectordb_storages/oceanbase.py +16 -17
- camel/storages/vectordb_storages/pgvector.py +349 -0
- camel/storages/vectordb_storages/qdrant.py +6 -6
- camel/storages/vectordb_storages/surreal.py +372 -0
- camel/storages/vectordb_storages/tidb.py +11 -8
- camel/storages/vectordb_storages/weaviate.py +714 -0
- camel/tasks/__init__.py +2 -2
- camel/tasks/task.py +366 -27
- camel/tasks/task_prompt.py +3 -3
- camel/terminators/__init__.py +2 -2
- camel/terminators/base.py +2 -2
- camel/terminators/response_terminator.py +2 -2
- camel/terminators/token_limit_terminator.py +2 -2
- camel/toolkits/__init__.py +58 -10
- camel/toolkits/aci_toolkit.py +66 -21
- camel/toolkits/arxiv_toolkit.py +8 -8
- camel/toolkits/ask_news_toolkit.py +2 -2
- camel/toolkits/async_browser_toolkit.py +174 -575
- camel/toolkits/audio_analysis_toolkit.py +3 -3
- camel/toolkits/base.py +65 -7
- camel/toolkits/bohrium_toolkit.py +318 -0
- camel/toolkits/browser_toolkit.py +306 -566
- camel/toolkits/browser_toolkit_commons.py +568 -0
- camel/toolkits/code_execution.py +67 -11
- camel/toolkits/context_summarizer_toolkit.py +684 -0
- camel/toolkits/craw4ai_toolkit.py +93 -0
- camel/toolkits/dappier_toolkit.py +12 -8
- camel/toolkits/data_commons_toolkit.py +2 -2
- camel/toolkits/dingtalk.py +1135 -0
- camel/toolkits/earth_science_toolkit.py +5367 -0
- camel/toolkits/edgeone_pages_mcp_toolkit.py +49 -0
- camel/toolkits/excel_toolkit.py +910 -70
- camel/toolkits/file_toolkit.py +1402 -0
- camel/toolkits/function_tool.py +128 -20
- camel/toolkits/github_toolkit.py +148 -43
- camel/toolkits/gmail_toolkit.py +1839 -0
- camel/toolkits/google_calendar_toolkit.py +40 -6
- camel/toolkits/google_drive_mcp_toolkit.py +54 -0
- camel/toolkits/google_maps_toolkit.py +2 -2
- camel/toolkits/google_scholar_toolkit.py +2 -2
- camel/toolkits/human_toolkit.py +36 -12
- camel/toolkits/hybrid_browser_toolkit/__init__.py +18 -0
- camel/toolkits/hybrid_browser_toolkit/config_loader.py +185 -0
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +246 -0
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +1973 -0
- camel/toolkits/hybrid_browser_toolkit/installer.py +203 -0
- camel/toolkits/hybrid_browser_toolkit/ts/package-lock.json +4589 -0
- camel/toolkits/hybrid_browser_toolkit/ts/package.json +33 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/browser-scripts.js +125 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +1929 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +233 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +589 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/index.ts +7 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/parent-child-filter.ts +226 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/snapshot-parser.ts +219 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/som-screenshot-injected.ts +543 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/types.ts +129 -0
- camel/toolkits/hybrid_browser_toolkit/ts/tsconfig.json +27 -0
- camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +319 -0
- camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +1037 -0
- camel/toolkits/hybrid_browser_toolkit_py/__init__.py +17 -0
- camel/toolkits/hybrid_browser_toolkit_py/actions.py +575 -0
- camel/toolkits/hybrid_browser_toolkit_py/agent.py +311 -0
- camel/toolkits/hybrid_browser_toolkit_py/browser_session.py +787 -0
- camel/toolkits/hybrid_browser_toolkit_py/config_loader.py +490 -0
- camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +2390 -0
- camel/toolkits/hybrid_browser_toolkit_py/snapshot.py +233 -0
- camel/toolkits/hybrid_browser_toolkit_py/stealth_script.js +0 -0
- camel/toolkits/hybrid_browser_toolkit_py/unified_analyzer.js +1043 -0
- camel/toolkits/image_analysis_toolkit.py +3 -3
- camel/toolkits/image_generation_toolkit.py +390 -0
- camel/toolkits/jina_reranker_toolkit.py +195 -79
- camel/toolkits/klavis_toolkit.py +7 -3
- camel/toolkits/linkedin_toolkit.py +2 -2
- camel/toolkits/markitdown_toolkit.py +104 -0
- camel/toolkits/math_toolkit.py +66 -12
- camel/toolkits/mcp_toolkit.py +841 -600
- camel/toolkits/memory_toolkit.py +7 -3
- camel/toolkits/meshy_toolkit.py +2 -2
- camel/toolkits/message_agent_toolkit.py +608 -0
- camel/toolkits/message_integration.py +724 -0
- camel/toolkits/mineru_toolkit.py +2 -2
- camel/toolkits/minimax_mcp_toolkit.py +195 -0
- camel/toolkits/networkx_toolkit.py +2 -2
- camel/toolkits/note_taking_toolkit.py +277 -0
- camel/toolkits/notion_mcp_toolkit.py +224 -0
- camel/toolkits/notion_toolkit.py +2 -2
- camel/toolkits/open_api_specs/biztoc/__init__.py +2 -2
- camel/toolkits/open_api_specs/biztoc/ai-plugin.json +1 -1
- camel/toolkits/open_api_specs/coursera/__init__.py +2 -2
- camel/toolkits/open_api_specs/create_qr_code/__init__.py +2 -2
- camel/toolkits/open_api_specs/klarna/__init__.py +2 -2
- camel/toolkits/open_api_specs/nasa_apod/__init__.py +2 -2
- camel/toolkits/open_api_specs/outschool/__init__.py +2 -2
- camel/toolkits/open_api_specs/outschool/ai-plugin.json +1 -1
- camel/toolkits/open_api_specs/outschool/openapi.yaml +1 -1
- camel/toolkits/open_api_specs/outschool/paths/__init__.py +2 -2
- camel/toolkits/open_api_specs/outschool/paths/get_classes.py +2 -2
- camel/toolkits/open_api_specs/outschool/paths/search_teachers.py +2 -2
- camel/toolkits/open_api_specs/security_config.py +2 -2
- camel/toolkits/open_api_specs/speak/__init__.py +2 -2
- camel/toolkits/open_api_specs/web_scraper/__init__.py +2 -2
- camel/toolkits/open_api_specs/web_scraper/ai-plugin.json +1 -1
- camel/toolkits/open_api_specs/web_scraper/paths/__init__.py +2 -2
- camel/toolkits/open_api_specs/web_scraper/paths/scraper.py +2 -2
- camel/toolkits/open_api_toolkit.py +2 -2
- camel/toolkits/openbb_toolkit.py +7 -3
- camel/toolkits/origene_mcp_toolkit.py +56 -0
- camel/toolkits/page_script.js +86 -74
- camel/toolkits/playwright_mcp_toolkit.py +27 -32
- camel/toolkits/pptx_toolkit.py +790 -0
- camel/toolkits/pubmed_toolkit.py +2 -2
- camel/toolkits/pulse_mcp_search_toolkit.py +2 -2
- camel/toolkits/pyautogui_toolkit.py +2 -2
- camel/toolkits/reddit_toolkit.py +2 -2
- camel/toolkits/resend_toolkit.py +168 -0
- camel/toolkits/retrieval_toolkit.py +2 -2
- camel/toolkits/screenshot_toolkit.py +213 -0
- camel/toolkits/search_toolkit.py +539 -146
- camel/toolkits/searxng_toolkit.py +2 -2
- camel/toolkits/semantic_scholar_toolkit.py +2 -2
- camel/toolkits/slack_toolkit.py +108 -58
- camel/toolkits/sql_toolkit.py +712 -0
- camel/toolkits/stripe_toolkit.py +2 -2
- camel/toolkits/sympy_toolkit.py +3 -3
- camel/toolkits/task_planning_toolkit.py +134 -0
- camel/toolkits/terminal_toolkit/__init__.py +18 -0
- camel/toolkits/terminal_toolkit/terminal_toolkit.py +1070 -0
- camel/toolkits/terminal_toolkit/utils.py +532 -0
- camel/toolkits/thinking_toolkit.py +3 -3
- camel/toolkits/twitter_toolkit.py +8 -3
- camel/toolkits/vertex_ai_veo_toolkit.py +590 -0
- camel/toolkits/video_analysis_toolkit.py +112 -29
- camel/toolkits/video_download_toolkit.py +22 -16
- camel/toolkits/weather_toolkit.py +2 -2
- camel/toolkits/web_deploy_toolkit.py +1219 -0
- camel/toolkits/wechat_official_toolkit.py +483 -0
- camel/toolkits/whatsapp_toolkit.py +2 -2
- camel/toolkits/wolfram_alpha_toolkit.py +53 -25
- camel/toolkits/zapier_toolkit.py +7 -3
- camel/types/__init__.py +4 -4
- camel/types/agents/__init__.py +2 -2
- camel/types/agents/tool_calling_record.py +6 -3
- camel/types/enums.py +454 -35
- camel/types/mcp_registries.py +2 -2
- camel/types/openai_types.py +4 -4
- camel/types/unified_model_type.py +43 -6
- camel/utils/__init__.py +20 -2
- camel/utils/async_func.py +2 -2
- camel/utils/chunker/__init__.py +2 -2
- camel/utils/chunker/base.py +2 -2
- camel/utils/chunker/code_chunker.py +2 -2
- camel/utils/chunker/uio_chunker.py +2 -2
- camel/utils/commons.py +65 -7
- camel/utils/constants.py +5 -2
- camel/utils/context_utils.py +1134 -0
- camel/utils/deduplication.py +2 -2
- camel/utils/filename.py +2 -2
- camel/utils/langfuse.py +258 -0
- camel/utils/mcp.py +140 -6
- camel/utils/mcp_client.py +1056 -0
- camel/utils/message_summarizer.py +148 -0
- camel/utils/response_format.py +2 -2
- camel/utils/token_counting.py +45 -22
- camel/utils/tool_result.py +44 -0
- camel/verifiers/__init__.py +2 -2
- camel/verifiers/base.py +2 -2
- camel/verifiers/math_verifier.py +2 -2
- camel/verifiers/models.py +2 -2
- camel/verifiers/physics_verifier.py +2 -2
- camel/verifiers/python_verifier.py +2 -2
- {camel_ai-0.2.59.dist-info → camel_ai-0.2.82.dist-info}/METADATA +349 -108
- camel_ai-0.2.82.dist-info/RECORD +507 -0
- {camel_ai-0.2.59.dist-info → camel_ai-0.2.82.dist-info}/WHEEL +1 -1
- {camel_ai-0.2.59.dist-info → camel_ai-0.2.82.dist-info}/licenses/LICENSE +1 -1
- camel/loaders/pandas_reader.py +0 -368
- camel/runtime/api.py +0 -97
- camel/toolkits/dalle_toolkit.py +0 -171
- camel/toolkits/file_write_toolkit.py +0 -395
- camel/toolkits/openai_agent_toolkit.py +0 -135
- camel/toolkits/terminal_toolkit.py +0 -1037
- camel_ai-0.2.59.dist-info/RECORD +0 -410
|
@@ -0,0 +1,1973 @@
|
|
|
1
|
+
# ========= Copyright 2023-2025 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# ========= Copyright 2023-2025 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
# =========
|
|
15
|
+
|
|
16
|
+
import contextlib
|
|
17
|
+
import time
|
|
18
|
+
from typing import (
|
|
19
|
+
Any,
|
|
20
|
+
Callable,
|
|
21
|
+
ClassVar,
|
|
22
|
+
Dict,
|
|
23
|
+
List,
|
|
24
|
+
Optional,
|
|
25
|
+
TypedDict,
|
|
26
|
+
cast,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
from camel.logger import get_logger
|
|
30
|
+
from camel.messages import BaseMessage
|
|
31
|
+
from camel.toolkits.base import BaseToolkit, RegisteredAgentToolkit
|
|
32
|
+
from camel.toolkits.function_tool import FunctionTool
|
|
33
|
+
from camel.utils.commons import dependencies_required
|
|
34
|
+
|
|
35
|
+
from .config_loader import ConfigLoader
|
|
36
|
+
from .ws_wrapper import WebSocketBrowserWrapper, high_level_action
|
|
37
|
+
|
|
38
|
+
logger = get_logger(__name__)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class SheetCell(TypedDict):
|
|
42
|
+
"""Type definition for a sheet cell input."""
|
|
43
|
+
|
|
44
|
+
row: int
|
|
45
|
+
col: int
|
|
46
|
+
text: str
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
50
|
+
r"""A hybrid browser toolkit that combines non-visual, DOM-based browser
|
|
51
|
+
automation with visual, screenshot-based capabilities.
|
|
52
|
+
|
|
53
|
+
This toolkit now uses TypeScript implementation with Playwright's
|
|
54
|
+
_snapshotForAI functionality for enhanced AI integration.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
DEFAULT_TOOLS: ClassVar[List[str]] = [
|
|
58
|
+
"browser_open",
|
|
59
|
+
"browser_close",
|
|
60
|
+
"browser_visit_page",
|
|
61
|
+
"browser_back",
|
|
62
|
+
"browser_forward",
|
|
63
|
+
"browser_click",
|
|
64
|
+
"browser_type",
|
|
65
|
+
"browser_switch_tab",
|
|
66
|
+
]
|
|
67
|
+
|
|
68
|
+
ALL_TOOLS: ClassVar[List[str]] = [
|
|
69
|
+
"browser_open",
|
|
70
|
+
"browser_close",
|
|
71
|
+
"browser_visit_page",
|
|
72
|
+
"browser_back",
|
|
73
|
+
"browser_forward",
|
|
74
|
+
"browser_get_page_snapshot",
|
|
75
|
+
"browser_get_som_screenshot",
|
|
76
|
+
"browser_click",
|
|
77
|
+
"browser_type",
|
|
78
|
+
"browser_select",
|
|
79
|
+
"browser_scroll",
|
|
80
|
+
"browser_enter",
|
|
81
|
+
"browser_mouse_control",
|
|
82
|
+
"browser_mouse_drag",
|
|
83
|
+
"browser_press_key",
|
|
84
|
+
"browser_wait_user",
|
|
85
|
+
"browser_switch_tab",
|
|
86
|
+
"browser_close_tab",
|
|
87
|
+
"browser_get_tab_info",
|
|
88
|
+
"browser_console_view",
|
|
89
|
+
"browser_console_exec",
|
|
90
|
+
"browser_sheet_input",
|
|
91
|
+
"browser_sheet_read",
|
|
92
|
+
]
|
|
93
|
+
|
|
94
|
+
def __init__(
|
|
95
|
+
self,
|
|
96
|
+
*,
|
|
97
|
+
headless: bool = True,
|
|
98
|
+
user_data_dir: Optional[str] = None,
|
|
99
|
+
stealth: bool = False,
|
|
100
|
+
cache_dir: Optional[str] = None,
|
|
101
|
+
enabled_tools: Optional[List[str]] = None,
|
|
102
|
+
browser_log_to_file: bool = False,
|
|
103
|
+
log_dir: Optional[str] = None,
|
|
104
|
+
session_id: Optional[str] = None,
|
|
105
|
+
default_start_url: Optional[str] = None,
|
|
106
|
+
default_timeout: Optional[int] = None,
|
|
107
|
+
short_timeout: Optional[int] = None,
|
|
108
|
+
navigation_timeout: Optional[int] = None,
|
|
109
|
+
network_idle_timeout: Optional[int] = None,
|
|
110
|
+
screenshot_timeout: Optional[int] = None,
|
|
111
|
+
page_stability_timeout: Optional[int] = None,
|
|
112
|
+
dom_content_loaded_timeout: Optional[int] = None,
|
|
113
|
+
viewport_limit: bool = False,
|
|
114
|
+
connect_over_cdp: bool = False,
|
|
115
|
+
cdp_url: Optional[str] = None,
|
|
116
|
+
cdp_keep_current_page: bool = False,
|
|
117
|
+
full_visual_mode: bool = False,
|
|
118
|
+
) -> None:
|
|
119
|
+
r"""Initialize the HybridBrowserToolkit.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
headless (bool): Whether to run browser in headless mode.
|
|
123
|
+
Defaults to True.
|
|
124
|
+
user_data_dir (Optional[str]): Directory for user data
|
|
125
|
+
persistence. Defaults to None.
|
|
126
|
+
stealth (bool): Whether to enable stealth mode. Defaults to
|
|
127
|
+
False.
|
|
128
|
+
cache_dir (str): Directory for caching. Defaults to "tmp/".
|
|
129
|
+
enabled_tools (Optional[List[str]]): List of enabled tools.
|
|
130
|
+
Defaults to None.
|
|
131
|
+
browser_log_to_file (bool): Whether to log browser actions to
|
|
132
|
+
file. Defaults to False.
|
|
133
|
+
log_dir (Optional[str]): Custom directory path for log files.
|
|
134
|
+
If None, defaults to "browser_log". Defaults to None.
|
|
135
|
+
session_id (Optional[str]): Session identifier. Defaults to None.
|
|
136
|
+
default_start_url (str): Default URL to start with. Defaults
|
|
137
|
+
to "https://google.com/".
|
|
138
|
+
default_timeout (Optional[int]): Default timeout in
|
|
139
|
+
milliseconds. Defaults to None.
|
|
140
|
+
short_timeout (Optional[int]): Short timeout in milliseconds.
|
|
141
|
+
Defaults to None.
|
|
142
|
+
navigation_timeout (Optional[int]): Navigation timeout in
|
|
143
|
+
milliseconds. Defaults to None.
|
|
144
|
+
network_idle_timeout (Optional[int]): Network idle timeout in
|
|
145
|
+
milliseconds. Defaults to None.
|
|
146
|
+
screenshot_timeout (Optional[int]): Screenshot timeout in
|
|
147
|
+
milliseconds. Defaults to None.
|
|
148
|
+
page_stability_timeout (Optional[int]): Page stability timeout
|
|
149
|
+
in milliseconds. Defaults to None.
|
|
150
|
+
dom_content_loaded_timeout (Optional[int]): DOM content loaded
|
|
151
|
+
timeout in milliseconds. Defaults to None.
|
|
152
|
+
viewport_limit (bool): Whether to filter page snapshot
|
|
153
|
+
elements to only those visible in the current viewport.
|
|
154
|
+
When True, only elements within the current viewport
|
|
155
|
+
bounds will be included in snapshots.
|
|
156
|
+
When False (default), all elements on the page are
|
|
157
|
+
included. Defaults to False.
|
|
158
|
+
connect_over_cdp (bool): Whether to connect to an existing
|
|
159
|
+
browser via Chrome DevTools Protocol. Defaults to False.
|
|
160
|
+
cdp_url (Optional[str]): WebSocket endpoint URL for CDP
|
|
161
|
+
connection (e.g., 'ws://localhost:9222/devtools/browser/...').
|
|
162
|
+
Required when connect_over_cdp is True. Defaults to None.
|
|
163
|
+
cdp_keep_current_page (bool): When True and using CDP mode,
|
|
164
|
+
won't create new pages but use the existing one. Defaults to False.
|
|
165
|
+
full_visual_mode (bool): When True, browser actions like click,
|
|
166
|
+
browser_open, visit_page, etc. will not return snapshots.
|
|
167
|
+
Defaults to False.
|
|
168
|
+
"""
|
|
169
|
+
super().__init__()
|
|
170
|
+
RegisteredAgentToolkit.__init__(self)
|
|
171
|
+
|
|
172
|
+
self.config_loader = ConfigLoader.from_kwargs(
|
|
173
|
+
headless=headless,
|
|
174
|
+
user_data_dir=user_data_dir,
|
|
175
|
+
stealth=stealth,
|
|
176
|
+
default_start_url=default_start_url,
|
|
177
|
+
default_timeout=default_timeout,
|
|
178
|
+
short_timeout=short_timeout,
|
|
179
|
+
navigation_timeout=navigation_timeout,
|
|
180
|
+
network_idle_timeout=network_idle_timeout,
|
|
181
|
+
screenshot_timeout=screenshot_timeout,
|
|
182
|
+
page_stability_timeout=page_stability_timeout,
|
|
183
|
+
dom_content_loaded_timeout=dom_content_loaded_timeout,
|
|
184
|
+
viewport_limit=viewport_limit,
|
|
185
|
+
cache_dir=cache_dir,
|
|
186
|
+
browser_log_to_file=browser_log_to_file,
|
|
187
|
+
log_dir=log_dir,
|
|
188
|
+
session_id=session_id,
|
|
189
|
+
enabled_tools=enabled_tools,
|
|
190
|
+
connect_over_cdp=connect_over_cdp,
|
|
191
|
+
cdp_url=cdp_url,
|
|
192
|
+
cdp_keep_current_page=cdp_keep_current_page,
|
|
193
|
+
full_visual_mode=full_visual_mode,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
browser_config = self.config_loader.get_browser_config()
|
|
197
|
+
toolkit_config = self.config_loader.get_toolkit_config()
|
|
198
|
+
|
|
199
|
+
if (
|
|
200
|
+
browser_config.cdp_keep_current_page
|
|
201
|
+
and default_start_url is not None
|
|
202
|
+
):
|
|
203
|
+
raise ValueError(
|
|
204
|
+
"Cannot use default_start_url with "
|
|
205
|
+
"cdp_keep_current_page=True. When cdp_keep_current_page "
|
|
206
|
+
"is True, the browser will keep the current page and not "
|
|
207
|
+
"navigate to any URL."
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
self._headless = browser_config.headless
|
|
211
|
+
self._user_data_dir = browser_config.user_data_dir
|
|
212
|
+
self._stealth = browser_config.stealth
|
|
213
|
+
self._cache_dir = toolkit_config.cache_dir
|
|
214
|
+
self._browser_log_to_file = toolkit_config.browser_log_to_file
|
|
215
|
+
self._default_start_url = browser_config.default_start_url
|
|
216
|
+
self._session_id = toolkit_config.session_id or "default"
|
|
217
|
+
self._viewport_limit = browser_config.viewport_limit
|
|
218
|
+
self._full_visual_mode = browser_config.full_visual_mode
|
|
219
|
+
|
|
220
|
+
self._default_timeout = browser_config.default_timeout
|
|
221
|
+
self._short_timeout = browser_config.short_timeout
|
|
222
|
+
self._navigation_timeout = browser_config.navigation_timeout
|
|
223
|
+
self._network_idle_timeout = browser_config.network_idle_timeout
|
|
224
|
+
self._screenshot_timeout = browser_config.screenshot_timeout
|
|
225
|
+
self._page_stability_timeout = browser_config.page_stability_timeout
|
|
226
|
+
self._dom_content_loaded_timeout = (
|
|
227
|
+
browser_config.dom_content_loaded_timeout
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
if enabled_tools is None:
|
|
231
|
+
self.enabled_tools = self.DEFAULT_TOOLS.copy()
|
|
232
|
+
else:
|
|
233
|
+
invalid_tools = [
|
|
234
|
+
tool for tool in enabled_tools if tool not in self.ALL_TOOLS
|
|
235
|
+
]
|
|
236
|
+
if invalid_tools:
|
|
237
|
+
raise ValueError(
|
|
238
|
+
f"Invalid tools specified: {invalid_tools}. "
|
|
239
|
+
f"Available tools: {self.ALL_TOOLS}"
|
|
240
|
+
)
|
|
241
|
+
self.enabled_tools = enabled_tools.copy()
|
|
242
|
+
|
|
243
|
+
logger.info(f"Enabled tools: {self.enabled_tools}")
|
|
244
|
+
|
|
245
|
+
self._ws_wrapper: Optional[WebSocketBrowserWrapper] = None
|
|
246
|
+
self._ws_config = self.config_loader.to_ws_config()
|
|
247
|
+
|
|
248
|
+
async def _ensure_ws_wrapper(self):
|
|
249
|
+
"""Ensure WebSocket wrapper is initialized."""
|
|
250
|
+
if self._ws_wrapper is None:
|
|
251
|
+
self._ws_wrapper = WebSocketBrowserWrapper(self._ws_config)
|
|
252
|
+
await self._ws_wrapper.start()
|
|
253
|
+
|
|
254
|
+
async def _get_ws_wrapper(self) -> WebSocketBrowserWrapper:
|
|
255
|
+
"""Get the WebSocket wrapper, initializing if needed."""
|
|
256
|
+
await self._ensure_ws_wrapper()
|
|
257
|
+
if self._ws_wrapper is None:
|
|
258
|
+
raise RuntimeError("Failed to initialize WebSocket wrapper")
|
|
259
|
+
return self._ws_wrapper
|
|
260
|
+
|
|
261
|
+
def __del__(self):
|
|
262
|
+
r"""Cleanup browser resources on garbage collection."""
|
|
263
|
+
try:
|
|
264
|
+
import sys
|
|
265
|
+
|
|
266
|
+
if getattr(sys, "is_finalizing", lambda: False)():
|
|
267
|
+
return
|
|
268
|
+
|
|
269
|
+
import asyncio
|
|
270
|
+
|
|
271
|
+
is_cdp = (
|
|
272
|
+
self._ws_config.get('connectOverCdp', False)
|
|
273
|
+
if hasattr(self, '_ws_config')
|
|
274
|
+
else False
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
try:
|
|
278
|
+
loop = asyncio.get_event_loop()
|
|
279
|
+
if not loop.is_closed() and not loop.is_running():
|
|
280
|
+
try:
|
|
281
|
+
if is_cdp:
|
|
282
|
+
# CDP: disconnect only
|
|
283
|
+
loop.run_until_complete(
|
|
284
|
+
asyncio.wait_for(
|
|
285
|
+
self.disconnect_websocket(), timeout=2.0
|
|
286
|
+
)
|
|
287
|
+
)
|
|
288
|
+
else:
|
|
289
|
+
loop.run_until_complete(
|
|
290
|
+
asyncio.wait_for(
|
|
291
|
+
self.browser_close(), timeout=2.0
|
|
292
|
+
)
|
|
293
|
+
)
|
|
294
|
+
except asyncio.TimeoutError:
|
|
295
|
+
pass
|
|
296
|
+
except (RuntimeError, ImportError):
|
|
297
|
+
pass
|
|
298
|
+
except Exception:
|
|
299
|
+
pass
|
|
300
|
+
|
|
301
|
+
@property
|
|
302
|
+
def cache_dir(self) -> str:
|
|
303
|
+
"""Get the cache directory."""
|
|
304
|
+
return self._cache_dir
|
|
305
|
+
|
|
306
|
+
async def browser_open(self) -> Dict[str, Any]:
|
|
307
|
+
r"""Starts a new browser session. This must be the first browser
|
|
308
|
+
action.
|
|
309
|
+
|
|
310
|
+
This method initializes the browser and navigates to a default start
|
|
311
|
+
page. To visit a specific URL, use `visit_page` after this.
|
|
312
|
+
|
|
313
|
+
Returns:
|
|
314
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
315
|
+
- "result" (str): Confirmation of the action.
|
|
316
|
+
- "snapshot" (str): A textual snapshot of interactive
|
|
317
|
+
elements.
|
|
318
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
319
|
+
- "current_tab" (int): Index of the active tab.
|
|
320
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
321
|
+
"""
|
|
322
|
+
try:
|
|
323
|
+
ws_wrapper = await self._get_ws_wrapper()
|
|
324
|
+
result = await ws_wrapper.open_browser(self._default_start_url)
|
|
325
|
+
|
|
326
|
+
tab_info = await ws_wrapper.get_tab_info()
|
|
327
|
+
result.update(
|
|
328
|
+
{
|
|
329
|
+
"tabs": tab_info,
|
|
330
|
+
"current_tab": next(
|
|
331
|
+
(
|
|
332
|
+
i
|
|
333
|
+
for i, tab in enumerate(tab_info)
|
|
334
|
+
if tab.get("is_current")
|
|
335
|
+
),
|
|
336
|
+
0,
|
|
337
|
+
),
|
|
338
|
+
"total_tabs": len(tab_info),
|
|
339
|
+
}
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
return result
|
|
343
|
+
except Exception as e:
|
|
344
|
+
logger.error(f"Failed to open browser: {e}")
|
|
345
|
+
return {
|
|
346
|
+
"result": f"Error opening browser: {e}",
|
|
347
|
+
"snapshot": "",
|
|
348
|
+
"tabs": [],
|
|
349
|
+
"current_tab": 0,
|
|
350
|
+
"total_tabs": 0,
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
async def browser_close(self) -> str:
|
|
354
|
+
r"""Closes the browser session, releasing all resources.
|
|
355
|
+
|
|
356
|
+
This should be called at the end of a task for cleanup.
|
|
357
|
+
|
|
358
|
+
Returns:
|
|
359
|
+
str: A confirmation message.
|
|
360
|
+
"""
|
|
361
|
+
try:
|
|
362
|
+
if self._ws_wrapper:
|
|
363
|
+
await self._ws_wrapper.stop()
|
|
364
|
+
self._ws_wrapper = None
|
|
365
|
+
return "Browser session closed."
|
|
366
|
+
except Exception as e:
|
|
367
|
+
logger.error(f"Failed to close browser: {e}")
|
|
368
|
+
return f"Error closing browser: {e}"
|
|
369
|
+
|
|
370
|
+
async def disconnect_websocket(self) -> str:
|
|
371
|
+
r"""Disconnects the WebSocket connection without closing the browser.
|
|
372
|
+
|
|
373
|
+
This is useful when using CDP mode where the browser should
|
|
374
|
+
remain open.
|
|
375
|
+
|
|
376
|
+
Returns:
|
|
377
|
+
str: A confirmation message.
|
|
378
|
+
"""
|
|
379
|
+
try:
|
|
380
|
+
if self._ws_wrapper:
|
|
381
|
+
is_cdp = self._ws_config.get('connectOverCdp', False)
|
|
382
|
+
|
|
383
|
+
if is_cdp:
|
|
384
|
+
# CDP: disconnect only
|
|
385
|
+
await self._ws_wrapper.disconnect_only()
|
|
386
|
+
else:
|
|
387
|
+
await self._ws_wrapper.stop()
|
|
388
|
+
|
|
389
|
+
self._ws_wrapper = None
|
|
390
|
+
return "WebSocket disconnected."
|
|
391
|
+
except Exception as e:
|
|
392
|
+
logger.error(f"Failed to disconnect WebSocket: {e}")
|
|
393
|
+
return f"Error disconnecting WebSocket: {e}"
|
|
394
|
+
|
|
395
|
+
async def browser_visit_page(self, url: str) -> Dict[str, Any]:
|
|
396
|
+
r"""Opens a URL in a new browser tab and switches to it.
|
|
397
|
+
|
|
398
|
+
Args:
|
|
399
|
+
url (str): The web address to load. This should be a valid and
|
|
400
|
+
existing URL.
|
|
401
|
+
|
|
402
|
+
Returns:
|
|
403
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
404
|
+
- "result" (str): Confirmation of the action.
|
|
405
|
+
- "snapshot" (str): A textual snapshot of the new page.
|
|
406
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
407
|
+
- "current_tab" (int): Index of the new active tab.
|
|
408
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
409
|
+
"""
|
|
410
|
+
try:
|
|
411
|
+
ws_wrapper = await self._get_ws_wrapper()
|
|
412
|
+
result = await ws_wrapper.visit_page(url)
|
|
413
|
+
|
|
414
|
+
tab_info = await ws_wrapper.get_tab_info()
|
|
415
|
+
result.update(
|
|
416
|
+
{
|
|
417
|
+
"tabs": tab_info,
|
|
418
|
+
"current_tab": next(
|
|
419
|
+
(
|
|
420
|
+
i
|
|
421
|
+
for i, tab in enumerate(tab_info)
|
|
422
|
+
if tab.get("is_current")
|
|
423
|
+
),
|
|
424
|
+
0,
|
|
425
|
+
),
|
|
426
|
+
"total_tabs": len(tab_info),
|
|
427
|
+
}
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
return result
|
|
431
|
+
except Exception as e:
|
|
432
|
+
logger.error(f"Failed to visit page: {e}")
|
|
433
|
+
return {
|
|
434
|
+
"result": f"Error visiting page: {e}",
|
|
435
|
+
"snapshot": "",
|
|
436
|
+
"tabs": [],
|
|
437
|
+
"current_tab": 0,
|
|
438
|
+
"total_tabs": 0,
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
async def browser_back(self) -> Dict[str, Any]:
|
|
442
|
+
r"""Goes back to the previous page in the browser history.
|
|
443
|
+
|
|
444
|
+
This action simulates using the browser's "back" button in the
|
|
445
|
+
currently active tab.
|
|
446
|
+
|
|
447
|
+
Returns:
|
|
448
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
449
|
+
- "result" (str): Confirmation of the action.
|
|
450
|
+
- "snapshot" (str): A textual snapshot of the previous page.
|
|
451
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
452
|
+
- "current_tab" (int): Index of the active tab.
|
|
453
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
454
|
+
"""
|
|
455
|
+
try:
|
|
456
|
+
ws_wrapper = await self._get_ws_wrapper()
|
|
457
|
+
result = await ws_wrapper.back()
|
|
458
|
+
|
|
459
|
+
tab_info = await ws_wrapper.get_tab_info()
|
|
460
|
+
result.update(
|
|
461
|
+
{
|
|
462
|
+
"tabs": tab_info,
|
|
463
|
+
"current_tab": next(
|
|
464
|
+
(
|
|
465
|
+
i
|
|
466
|
+
for i, tab in enumerate(tab_info)
|
|
467
|
+
if tab.get("is_current")
|
|
468
|
+
),
|
|
469
|
+
0,
|
|
470
|
+
),
|
|
471
|
+
"total_tabs": len(tab_info),
|
|
472
|
+
}
|
|
473
|
+
)
|
|
474
|
+
|
|
475
|
+
return result
|
|
476
|
+
except Exception as e:
|
|
477
|
+
logger.error(f"Failed to navigate back: {e}")
|
|
478
|
+
return {
|
|
479
|
+
"result": f"Error navigating back: {e}",
|
|
480
|
+
"snapshot": "",
|
|
481
|
+
"tabs": [],
|
|
482
|
+
"current_tab": 0,
|
|
483
|
+
"total_tabs": 0,
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
async def browser_forward(self) -> Dict[str, Any]:
|
|
487
|
+
r"""Goes forward to the next page in the browser history.
|
|
488
|
+
|
|
489
|
+
This action simulates using the browser's "forward" button in the
|
|
490
|
+
currently active tab.
|
|
491
|
+
|
|
492
|
+
Returns:
|
|
493
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
494
|
+
- "result" (str): Confirmation of the action.
|
|
495
|
+
- "snapshot" (str): A textual snapshot of the next page.
|
|
496
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
497
|
+
- "current_tab" (int): Index of the active tab.
|
|
498
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
499
|
+
"""
|
|
500
|
+
try:
|
|
501
|
+
ws_wrapper = await self._get_ws_wrapper()
|
|
502
|
+
result = await ws_wrapper.forward()
|
|
503
|
+
|
|
504
|
+
tab_info = await ws_wrapper.get_tab_info()
|
|
505
|
+
result.update(
|
|
506
|
+
{
|
|
507
|
+
"tabs": tab_info,
|
|
508
|
+
"current_tab": next(
|
|
509
|
+
(
|
|
510
|
+
i
|
|
511
|
+
for i, tab in enumerate(tab_info)
|
|
512
|
+
if tab.get("is_current")
|
|
513
|
+
),
|
|
514
|
+
0,
|
|
515
|
+
),
|
|
516
|
+
"total_tabs": len(tab_info),
|
|
517
|
+
}
|
|
518
|
+
)
|
|
519
|
+
|
|
520
|
+
return result
|
|
521
|
+
except Exception as e:
|
|
522
|
+
logger.error(f"Failed to navigate forward: {e}")
|
|
523
|
+
return {
|
|
524
|
+
"result": f"Error navigating forward: {e}",
|
|
525
|
+
"snapshot": "",
|
|
526
|
+
"tabs": [],
|
|
527
|
+
"current_tab": 0,
|
|
528
|
+
"total_tabs": 0,
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
async def browser_get_page_snapshot(self) -> str:
|
|
532
|
+
r"""Gets a textual snapshot of the page's interactive elements.
|
|
533
|
+
|
|
534
|
+
The snapshot lists elements like buttons, links, and inputs,
|
|
535
|
+
each with
|
|
536
|
+
a unique `ref` ID. This ID is used by other tools (e.g., `click`,
|
|
537
|
+
`type`) to interact with a specific element. This tool provides no
|
|
538
|
+
visual information.
|
|
539
|
+
|
|
540
|
+
If viewport_limit is enabled, only elements within the current
|
|
541
|
+
viewport
|
|
542
|
+
will be included in the snapshot.
|
|
543
|
+
|
|
544
|
+
Returns:
|
|
545
|
+
str: A formatted string representing the interactive elements and
|
|
546
|
+
their `ref` IDs. For example:
|
|
547
|
+
'- link "Sign In" [ref=1]'
|
|
548
|
+
'- textbox "Username" [ref=2]'
|
|
549
|
+
"""
|
|
550
|
+
try:
|
|
551
|
+
ws_wrapper = await self._get_ws_wrapper()
|
|
552
|
+
return await ws_wrapper.get_page_snapshot(self._viewport_limit)
|
|
553
|
+
except Exception as e:
|
|
554
|
+
logger.error(f"Failed to get page snapshot: {e}")
|
|
555
|
+
return f"Error capturing snapshot: {e}"
|
|
556
|
+
|
|
557
|
+
@dependencies_required('PIL')
|
|
558
|
+
async def browser_get_som_screenshot(
|
|
559
|
+
self,
|
|
560
|
+
read_image: bool = True,
|
|
561
|
+
instruction: Optional[str] = None,
|
|
562
|
+
) -> str:
|
|
563
|
+
r"""Captures a screenshot with interactive elements highlighted.
|
|
564
|
+
|
|
565
|
+
"SoM" stands for "Set of Marks". This tool takes a screenshot and
|
|
566
|
+
draws
|
|
567
|
+
boxes around clickable elements, overlaying a `ref` ID on each. Use
|
|
568
|
+
this for a visual understanding of the page, especially when the
|
|
569
|
+
textual snapshot is not enough.
|
|
570
|
+
|
|
571
|
+
Args:
|
|
572
|
+
read_image (bool, optional): If `True`, the agent will analyze
|
|
573
|
+
the screenshot. Requires agent to be registered.
|
|
574
|
+
(default: :obj:`True`)
|
|
575
|
+
instruction (Optional[str], optional): A specific question or
|
|
576
|
+
command for the agent regarding the screenshot, used only if
|
|
577
|
+
`read_image` is `True`. For example: "Find the login button."
|
|
578
|
+
|
|
579
|
+
Returns:
|
|
580
|
+
str: A confirmation message indicating the screenshot was
|
|
581
|
+
captured, the file path where it was saved, and optionally the
|
|
582
|
+
agent's analysis if `read_image` is `True`.
|
|
583
|
+
"""
|
|
584
|
+
import base64
|
|
585
|
+
import datetime
|
|
586
|
+
import os
|
|
587
|
+
import urllib.parse
|
|
588
|
+
|
|
589
|
+
from camel.utils import sanitize_filename
|
|
590
|
+
|
|
591
|
+
try:
|
|
592
|
+
ws_wrapper = await self._get_ws_wrapper()
|
|
593
|
+
result = await ws_wrapper.get_som_screenshot()
|
|
594
|
+
|
|
595
|
+
result_text = result.text
|
|
596
|
+
file_path = None
|
|
597
|
+
|
|
598
|
+
if result.images:
|
|
599
|
+
cache_dir = os.path.abspath(self._cache_dir)
|
|
600
|
+
os.makedirs(cache_dir, exist_ok=True)
|
|
601
|
+
|
|
602
|
+
try:
|
|
603
|
+
page_info = await ws_wrapper.get_tab_info()
|
|
604
|
+
current_tab = next(
|
|
605
|
+
(tab for tab in page_info if tab.get('is_current')),
|
|
606
|
+
None,
|
|
607
|
+
)
|
|
608
|
+
url = current_tab['url'] if current_tab else 'unknown'
|
|
609
|
+
except Exception:
|
|
610
|
+
url = 'unknown'
|
|
611
|
+
|
|
612
|
+
parsed_url = urllib.parse.urlparse(url)
|
|
613
|
+
url_name = sanitize_filename(
|
|
614
|
+
str(parsed_url.path) or 'homepage', max_length=241
|
|
615
|
+
)
|
|
616
|
+
timestamp = datetime.datetime.now().strftime("%m%d%H%M%S")
|
|
617
|
+
file_path = os.path.join(
|
|
618
|
+
cache_dir, f"{url_name}_{timestamp}_som.png"
|
|
619
|
+
)
|
|
620
|
+
|
|
621
|
+
for _, image_data in enumerate(result.images):
|
|
622
|
+
if image_data.startswith('data:image/png;base64,'):
|
|
623
|
+
base64_data = image_data.split(',', 1)[1]
|
|
624
|
+
|
|
625
|
+
image_bytes = base64.b64decode(base64_data)
|
|
626
|
+
with open(file_path, 'wb') as f:
|
|
627
|
+
f.write(image_bytes)
|
|
628
|
+
|
|
629
|
+
logger.info(f"Screenshot saved to: {file_path}")
|
|
630
|
+
|
|
631
|
+
result_text += f" (saved to: {file_path})"
|
|
632
|
+
break
|
|
633
|
+
|
|
634
|
+
if read_image and file_path:
|
|
635
|
+
if self.agent is None:
|
|
636
|
+
logger.error(
|
|
637
|
+
"Cannot analyze screenshot: No agent registered. "
|
|
638
|
+
"Please pass this toolkit to ChatAgent via "
|
|
639
|
+
"toolkits_to_register_agent parameter."
|
|
640
|
+
)
|
|
641
|
+
result_text += (
|
|
642
|
+
" Error: No agent registered for image analysis. "
|
|
643
|
+
"Please pass this toolkit to ChatAgent via "
|
|
644
|
+
"toolkits_to_register_agent parameter."
|
|
645
|
+
)
|
|
646
|
+
else:
|
|
647
|
+
try:
|
|
648
|
+
from PIL import Image
|
|
649
|
+
|
|
650
|
+
img = Image.open(file_path)
|
|
651
|
+
inst = instruction if instruction is not None else ""
|
|
652
|
+
message = BaseMessage.make_user_message(
|
|
653
|
+
role_name="User",
|
|
654
|
+
content=inst,
|
|
655
|
+
image_list=[img],
|
|
656
|
+
)
|
|
657
|
+
|
|
658
|
+
response = await self.agent.astep(message)
|
|
659
|
+
agent_response = response.msgs[0].content
|
|
660
|
+
result_text += f". Agent analysis: {agent_response}"
|
|
661
|
+
except Exception as e:
|
|
662
|
+
logger.error(f"Error analyzing screenshot: {e}")
|
|
663
|
+
result_text += f". Error analyzing screenshot: {e}"
|
|
664
|
+
|
|
665
|
+
return result_text
|
|
666
|
+
except Exception as e:
|
|
667
|
+
logger.error(f"Failed to get screenshot: {e}")
|
|
668
|
+
return f"Error capturing screenshot: {e}"
|
|
669
|
+
|
|
670
|
+
async def browser_click(self, *, ref: str) -> Dict[str, Any]:
|
|
671
|
+
r"""Performs a click on an element on the page.
|
|
672
|
+
|
|
673
|
+
Args:
|
|
674
|
+
ref (str): The `ref` ID of the element to click. This ID is
|
|
675
|
+
obtained from a page snapshot (`get_page_snapshot` or
|
|
676
|
+
`get_som_screenshot`).
|
|
677
|
+
|
|
678
|
+
Returns:
|
|
679
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
680
|
+
- "result" (str): Confirmation of the action.
|
|
681
|
+
- "snapshot" (str): A textual snapshot of the page after the
|
|
682
|
+
click.
|
|
683
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
684
|
+
- "current_tab" (int): Index of the active tab.
|
|
685
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
686
|
+
"""
|
|
687
|
+
try:
|
|
688
|
+
ws_wrapper = await self._get_ws_wrapper()
|
|
689
|
+
result = await ws_wrapper.click(ref)
|
|
690
|
+
|
|
691
|
+
tab_info = await ws_wrapper.get_tab_info()
|
|
692
|
+
|
|
693
|
+
response = {
|
|
694
|
+
"result": result.get("result", ""),
|
|
695
|
+
"snapshot": result.get("snapshot", ""),
|
|
696
|
+
"tabs": tab_info,
|
|
697
|
+
"current_tab": next(
|
|
698
|
+
(
|
|
699
|
+
i
|
|
700
|
+
for i, tab in enumerate(tab_info)
|
|
701
|
+
if tab.get("is_current")
|
|
702
|
+
),
|
|
703
|
+
0,
|
|
704
|
+
),
|
|
705
|
+
"total_tabs": len(tab_info),
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
if "newTabId" in result:
|
|
709
|
+
response["newTabId"] = result["newTabId"]
|
|
710
|
+
|
|
711
|
+
if "timing" in result:
|
|
712
|
+
response["timing"] = result["timing"]
|
|
713
|
+
|
|
714
|
+
return response
|
|
715
|
+
except Exception as e:
|
|
716
|
+
logger.error(f"Failed to click element: {e}")
|
|
717
|
+
return {
|
|
718
|
+
"result": f"Error clicking element: {e}",
|
|
719
|
+
"snapshot": "",
|
|
720
|
+
"tabs": [],
|
|
721
|
+
"current_tab": 0,
|
|
722
|
+
"total_tabs": 0,
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
async def browser_type(
|
|
726
|
+
self,
|
|
727
|
+
*,
|
|
728
|
+
ref: Optional[str] = None,
|
|
729
|
+
text: Optional[str] = None,
|
|
730
|
+
inputs: Optional[List[Dict[str, str]]] = None,
|
|
731
|
+
) -> Dict[str, Any]:
|
|
732
|
+
r"""Types text into one or more input elements on the page.
|
|
733
|
+
|
|
734
|
+
This method supports two modes:
|
|
735
|
+
1. Single input mode (backward compatible): Provide 'ref' and 'text'
|
|
736
|
+
2. Multiple inputs mode: Provide 'inputs' as a list of dictionaries
|
|
737
|
+
with 'ref' and 'text' keys
|
|
738
|
+
|
|
739
|
+
Args:
|
|
740
|
+
ref (Optional[str]): The `ref` ID of the input element, from a
|
|
741
|
+
snapshot. Required when using single input mode.
|
|
742
|
+
text (Optional[str]): The text to type into the element. Required
|
|
743
|
+
when using single input mode.
|
|
744
|
+
inputs (Optional[List[Dict[str, str]]]): List of dictionaries,
|
|
745
|
+
each containing 'ref' and 'text' keys for typing into multiple
|
|
746
|
+
elements. Example: [{'ref': '1', 'text': 'username'},
|
|
747
|
+
{'ref': '2', 'text': 'password'}]
|
|
748
|
+
|
|
749
|
+
Returns:
|
|
750
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
751
|
+
- "result" (str): Confirmation of the action.
|
|
752
|
+
- "snapshot" (str): A textual snapshot of the page after
|
|
753
|
+
typing.
|
|
754
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
755
|
+
- "current_tab" (int): Index of the active tab.
|
|
756
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
757
|
+
- "details" (Dict[str, Any]): When using multiple inputs,
|
|
758
|
+
contains success/error status for each ref.
|
|
759
|
+
"""
|
|
760
|
+
try:
|
|
761
|
+
ws_wrapper = await self._get_ws_wrapper()
|
|
762
|
+
|
|
763
|
+
if ref is not None and text is not None:
|
|
764
|
+
result = await ws_wrapper.type(ref, text)
|
|
765
|
+
elif inputs is not None:
|
|
766
|
+
result = await ws_wrapper.type_multiple(inputs)
|
|
767
|
+
else:
|
|
768
|
+
raise ValueError(
|
|
769
|
+
"Either provide 'ref' and 'text' for single input, "
|
|
770
|
+
"or 'inputs' for multiple inputs"
|
|
771
|
+
)
|
|
772
|
+
|
|
773
|
+
tab_info = await ws_wrapper.get_tab_info()
|
|
774
|
+
result.update(
|
|
775
|
+
{
|
|
776
|
+
"tabs": tab_info,
|
|
777
|
+
"current_tab": next(
|
|
778
|
+
(
|
|
779
|
+
i
|
|
780
|
+
for i, tab in enumerate(tab_info)
|
|
781
|
+
if tab.get("is_current")
|
|
782
|
+
),
|
|
783
|
+
0,
|
|
784
|
+
),
|
|
785
|
+
"total_tabs": len(tab_info),
|
|
786
|
+
}
|
|
787
|
+
)
|
|
788
|
+
|
|
789
|
+
return result
|
|
790
|
+
except Exception as e:
|
|
791
|
+
logger.error(f"Failed to type text: {e}")
|
|
792
|
+
return {
|
|
793
|
+
"result": f"Error typing text: {e}",
|
|
794
|
+
"snapshot": "",
|
|
795
|
+
"tabs": [],
|
|
796
|
+
"current_tab": 0,
|
|
797
|
+
"total_tabs": 0,
|
|
798
|
+
}
|
|
799
|
+
|
|
800
|
+
async def browser_select(self, *, ref: str, value: str) -> Dict[str, Any]:
|
|
801
|
+
r"""Selects an option in a dropdown (`<select>`) element.
|
|
802
|
+
|
|
803
|
+
Args:
|
|
804
|
+
ref (str): The `ref` ID of the `<select>` element.
|
|
805
|
+
value (str): The `value` attribute of the `<option>` to select,
|
|
806
|
+
not its visible text.
|
|
807
|
+
|
|
808
|
+
Returns:
|
|
809
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
810
|
+
- "result" (str): Confirmation of the action.
|
|
811
|
+
- "snapshot" (str): A snapshot of the page after the
|
|
812
|
+
selection.
|
|
813
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
814
|
+
- "current_tab" (int): Index of the active tab.
|
|
815
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
816
|
+
"""
|
|
817
|
+
try:
|
|
818
|
+
ws_wrapper = await self._get_ws_wrapper()
|
|
819
|
+
result = await ws_wrapper.select(ref, value)
|
|
820
|
+
|
|
821
|
+
tab_info = await ws_wrapper.get_tab_info()
|
|
822
|
+
result.update(
|
|
823
|
+
{
|
|
824
|
+
"tabs": tab_info,
|
|
825
|
+
"current_tab": next(
|
|
826
|
+
(
|
|
827
|
+
i
|
|
828
|
+
for i, tab in enumerate(tab_info)
|
|
829
|
+
if tab.get("is_current")
|
|
830
|
+
),
|
|
831
|
+
0,
|
|
832
|
+
),
|
|
833
|
+
"total_tabs": len(tab_info),
|
|
834
|
+
}
|
|
835
|
+
)
|
|
836
|
+
|
|
837
|
+
return result
|
|
838
|
+
except Exception as e:
|
|
839
|
+
logger.error(f"Failed to select option: {e}")
|
|
840
|
+
return {
|
|
841
|
+
"result": f"Error selecting option: {e}",
|
|
842
|
+
"snapshot": "",
|
|
843
|
+
"tabs": [],
|
|
844
|
+
"current_tab": 0,
|
|
845
|
+
"total_tabs": 0,
|
|
846
|
+
}
|
|
847
|
+
|
|
848
|
+
async def browser_scroll(
|
|
849
|
+
self, *, direction: str, amount: int = 500
|
|
850
|
+
) -> Dict[str, Any]:
|
|
851
|
+
r"""Scrolls the current page window.
|
|
852
|
+
|
|
853
|
+
Args:
|
|
854
|
+
direction (str): The direction to scroll: 'up' or 'down'.
|
|
855
|
+
amount (int): The number of pixels to scroll, default is 500.
|
|
856
|
+
|
|
857
|
+
Returns:
|
|
858
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
859
|
+
- "result" (str): Confirmation of the action.
|
|
860
|
+
- "snapshot" (str): A snapshot of the page after scrolling.
|
|
861
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
862
|
+
- "current_tab" (int): Index of the active tab.
|
|
863
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
864
|
+
"""
|
|
865
|
+
try:
|
|
866
|
+
ws_wrapper = await self._get_ws_wrapper()
|
|
867
|
+
result = await ws_wrapper.scroll(direction, amount)
|
|
868
|
+
|
|
869
|
+
tab_info = await ws_wrapper.get_tab_info()
|
|
870
|
+
result.update(
|
|
871
|
+
{
|
|
872
|
+
"tabs": tab_info,
|
|
873
|
+
"current_tab": next(
|
|
874
|
+
(
|
|
875
|
+
i
|
|
876
|
+
for i, tab in enumerate(tab_info)
|
|
877
|
+
if tab.get("is_current")
|
|
878
|
+
),
|
|
879
|
+
0,
|
|
880
|
+
),
|
|
881
|
+
"total_tabs": len(tab_info),
|
|
882
|
+
}
|
|
883
|
+
)
|
|
884
|
+
|
|
885
|
+
return result
|
|
886
|
+
except Exception as e:
|
|
887
|
+
logger.error(f"Failed to scroll: {e}")
|
|
888
|
+
return {
|
|
889
|
+
"result": f"Error scrolling: {e}",
|
|
890
|
+
"snapshot": "",
|
|
891
|
+
"tabs": [],
|
|
892
|
+
"current_tab": 0,
|
|
893
|
+
"total_tabs": 0,
|
|
894
|
+
}
|
|
895
|
+
|
|
896
|
+
async def browser_enter(self) -> Dict[str, Any]:
|
|
897
|
+
r"""Simulates pressing the Enter key on the currently focused
|
|
898
|
+
element.
|
|
899
|
+
|
|
900
|
+
This is useful for submitting forms or search queries after using the
|
|
901
|
+
`type` tool.
|
|
902
|
+
|
|
903
|
+
Returns:
|
|
904
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
905
|
+
- "result" (str): Confirmation of the action.
|
|
906
|
+
- "snapshot" (str): A new page snapshot, as this action often
|
|
907
|
+
triggers navigation.
|
|
908
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
909
|
+
- "current_tab" (int): Index of the active tab.
|
|
910
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
911
|
+
"""
|
|
912
|
+
try:
|
|
913
|
+
ws_wrapper = await self._get_ws_wrapper()
|
|
914
|
+
result = await ws_wrapper.enter()
|
|
915
|
+
|
|
916
|
+
tab_info = await ws_wrapper.get_tab_info()
|
|
917
|
+
result.update(
|
|
918
|
+
{
|
|
919
|
+
"tabs": tab_info,
|
|
920
|
+
"current_tab": next(
|
|
921
|
+
(
|
|
922
|
+
i
|
|
923
|
+
for i, tab in enumerate(tab_info)
|
|
924
|
+
if tab.get("is_current")
|
|
925
|
+
),
|
|
926
|
+
0,
|
|
927
|
+
),
|
|
928
|
+
"total_tabs": len(tab_info),
|
|
929
|
+
}
|
|
930
|
+
)
|
|
931
|
+
|
|
932
|
+
return result
|
|
933
|
+
except Exception as e:
|
|
934
|
+
logger.error(f"Failed to press enter: {e}")
|
|
935
|
+
return {
|
|
936
|
+
"result": f"Error pressing enter: {e}",
|
|
937
|
+
"snapshot": "",
|
|
938
|
+
"tabs": [],
|
|
939
|
+
"current_tab": 0,
|
|
940
|
+
"total_tabs": 0,
|
|
941
|
+
}
|
|
942
|
+
|
|
943
|
+
async def browser_mouse_control(
|
|
944
|
+
self, *, control: str, x: float, y: float
|
|
945
|
+
) -> Dict[str, Any]:
|
|
946
|
+
r"""Control the mouse to interact with browser with x, y coordinates
|
|
947
|
+
|
|
948
|
+
Args:
|
|
949
|
+
control ([str]): The action to perform: 'click', 'right_click'
|
|
950
|
+
or 'dblclick'.
|
|
951
|
+
x (float): x-coordinate for the control action.
|
|
952
|
+
y (float): y-coordinate for the control action.
|
|
953
|
+
|
|
954
|
+
Returns:
|
|
955
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
956
|
+
- "result" (str): Confirmation of the action.
|
|
957
|
+
- "snapshot" (str): A snapshot of the page after mouse
|
|
958
|
+
control action.
|
|
959
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
960
|
+
- "current_tab" (int): Index of the active tab.
|
|
961
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
962
|
+
"""
|
|
963
|
+
try:
|
|
964
|
+
ws_wrapper = await self._get_ws_wrapper()
|
|
965
|
+
result = await ws_wrapper.mouse_control(control, x, y)
|
|
966
|
+
|
|
967
|
+
tab_info = await ws_wrapper.get_tab_info()
|
|
968
|
+
result.update(
|
|
969
|
+
{
|
|
970
|
+
"tabs": tab_info,
|
|
971
|
+
"current_tab": next(
|
|
972
|
+
(
|
|
973
|
+
i
|
|
974
|
+
for i, tab in enumerate(tab_info)
|
|
975
|
+
if tab.get("is_current")
|
|
976
|
+
),
|
|
977
|
+
0,
|
|
978
|
+
),
|
|
979
|
+
"total_tabs": len(tab_info),
|
|
980
|
+
}
|
|
981
|
+
)
|
|
982
|
+
|
|
983
|
+
return result
|
|
984
|
+
except Exception as e:
|
|
985
|
+
logger.error(f"Failed to control mouse: {e}")
|
|
986
|
+
return {
|
|
987
|
+
"result": f"Error with mouse control: {e}",
|
|
988
|
+
"snapshot": "",
|
|
989
|
+
"tabs": [],
|
|
990
|
+
"current_tab": 0,
|
|
991
|
+
"total_tabs": 0,
|
|
992
|
+
}
|
|
993
|
+
|
|
994
|
+
async def browser_mouse_drag(
|
|
995
|
+
self, *, from_ref: str, to_ref: str
|
|
996
|
+
) -> Dict[str, Any]:
|
|
997
|
+
r"""Control the mouse to drag and drop in the browser using ref IDs.
|
|
998
|
+
|
|
999
|
+
Args:
|
|
1000
|
+
from_ref (str): The `ref` ID of the source element to drag from.
|
|
1001
|
+
to_ref (str): The `ref` ID of the target element to drag to.
|
|
1002
|
+
|
|
1003
|
+
Returns:
|
|
1004
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
1005
|
+
- "result" (str): Confirmation of the action.
|
|
1006
|
+
- "snapshot" (str): A new page snapshot.
|
|
1007
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
1008
|
+
- "current_tab" (int): Index of the active tab.
|
|
1009
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
1010
|
+
"""
|
|
1011
|
+
try:
|
|
1012
|
+
ws_wrapper = await self._get_ws_wrapper()
|
|
1013
|
+
result = await ws_wrapper.mouse_drag(from_ref, to_ref)
|
|
1014
|
+
|
|
1015
|
+
tab_info = await ws_wrapper.get_tab_info()
|
|
1016
|
+
result.update(
|
|
1017
|
+
{
|
|
1018
|
+
"tabs": tab_info,
|
|
1019
|
+
"current_tab": next(
|
|
1020
|
+
(
|
|
1021
|
+
i
|
|
1022
|
+
for i, tab in enumerate(tab_info)
|
|
1023
|
+
if tab.get("is_current")
|
|
1024
|
+
),
|
|
1025
|
+
0,
|
|
1026
|
+
),
|
|
1027
|
+
"total_tabs": len(tab_info),
|
|
1028
|
+
}
|
|
1029
|
+
)
|
|
1030
|
+
|
|
1031
|
+
return result
|
|
1032
|
+
except Exception as e:
|
|
1033
|
+
logger.error(f"Error with mouse drag and drop: {e}")
|
|
1034
|
+
return {
|
|
1035
|
+
"result": f"Error with mouse drag and drop: {e}",
|
|
1036
|
+
"snapshot": "",
|
|
1037
|
+
"tabs": [],
|
|
1038
|
+
"current_tab": 0,
|
|
1039
|
+
"total_tabs": 0,
|
|
1040
|
+
}
|
|
1041
|
+
|
|
1042
|
+
async def browser_press_key(self, *, keys: List[str]) -> Dict[str, Any]:
|
|
1043
|
+
r"""Press key and key combinations.
|
|
1044
|
+
Supports single key press or combination of keys by concatenating
|
|
1045
|
+
them with '+' separator.
|
|
1046
|
+
|
|
1047
|
+
Args:
|
|
1048
|
+
keys (List[str]): key or list of keys.
|
|
1049
|
+
|
|
1050
|
+
Returns:
|
|
1051
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
1052
|
+
- "result" (str): Confirmation of the action.
|
|
1053
|
+
- "snapshot" (str): A snapshot of the page after
|
|
1054
|
+
press key action.
|
|
1055
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
1056
|
+
- "current_tab" (int): Index of the active tab.
|
|
1057
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
1058
|
+
"""
|
|
1059
|
+
try:
|
|
1060
|
+
ws_wrapper = await self._get_ws_wrapper()
|
|
1061
|
+
result = await ws_wrapper.press_key(keys)
|
|
1062
|
+
|
|
1063
|
+
tab_info = await ws_wrapper.get_tab_info()
|
|
1064
|
+
result.update(
|
|
1065
|
+
{
|
|
1066
|
+
"tabs": tab_info,
|
|
1067
|
+
"current_tab": next(
|
|
1068
|
+
(
|
|
1069
|
+
i
|
|
1070
|
+
for i, tab in enumerate(tab_info)
|
|
1071
|
+
if tab.get("is_current")
|
|
1072
|
+
),
|
|
1073
|
+
0,
|
|
1074
|
+
),
|
|
1075
|
+
"total_tabs": len(tab_info),
|
|
1076
|
+
}
|
|
1077
|
+
)
|
|
1078
|
+
|
|
1079
|
+
return result
|
|
1080
|
+
except Exception as e:
|
|
1081
|
+
logger.error(f"Failed to press key: {e}")
|
|
1082
|
+
return {
|
|
1083
|
+
"result": f"Error with press key: {e}",
|
|
1084
|
+
"snapshot": "",
|
|
1085
|
+
"tabs": [],
|
|
1086
|
+
"current_tab": 0,
|
|
1087
|
+
"total_tabs": 0,
|
|
1088
|
+
}
|
|
1089
|
+
|
|
1090
|
+
async def browser_switch_tab(self, *, tab_id: str) -> Dict[str, Any]:
|
|
1091
|
+
r"""Switches to a different browser tab using its ID.
|
|
1092
|
+
|
|
1093
|
+
After switching, all actions will apply to the new tab. Use
|
|
1094
|
+
`get_tab_info` to find the ID of the tab you want to switch to.
|
|
1095
|
+
|
|
1096
|
+
Args:
|
|
1097
|
+
tab_id (str): The ID of the tab to activate.
|
|
1098
|
+
|
|
1099
|
+
Returns:
|
|
1100
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
1101
|
+
- "result" (str): Confirmation of the action.
|
|
1102
|
+
- "snapshot" (str): A snapshot of the newly active tab.
|
|
1103
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
1104
|
+
- "current_tab" (int): Index of the new active tab.
|
|
1105
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
1106
|
+
"""
|
|
1107
|
+
try:
|
|
1108
|
+
ws_wrapper = await self._get_ws_wrapper()
|
|
1109
|
+
result = await ws_wrapper.switch_tab(tab_id)
|
|
1110
|
+
|
|
1111
|
+
tab_info = await ws_wrapper.get_tab_info()
|
|
1112
|
+
result.update(
|
|
1113
|
+
{
|
|
1114
|
+
"tabs": tab_info,
|
|
1115
|
+
"current_tab": next(
|
|
1116
|
+
(
|
|
1117
|
+
i
|
|
1118
|
+
for i, tab in enumerate(tab_info)
|
|
1119
|
+
if tab.get("is_current")
|
|
1120
|
+
),
|
|
1121
|
+
0,
|
|
1122
|
+
),
|
|
1123
|
+
"total_tabs": len(tab_info),
|
|
1124
|
+
}
|
|
1125
|
+
)
|
|
1126
|
+
|
|
1127
|
+
return result
|
|
1128
|
+
except Exception as e:
|
|
1129
|
+
logger.error(f"Failed to switch tab: {e}")
|
|
1130
|
+
return {
|
|
1131
|
+
"result": f"Error switching tab: {e}",
|
|
1132
|
+
"snapshot": "",
|
|
1133
|
+
"tabs": [],
|
|
1134
|
+
"current_tab": 0,
|
|
1135
|
+
"total_tabs": 0,
|
|
1136
|
+
}
|
|
1137
|
+
|
|
1138
|
+
async def browser_close_tab(self, *, tab_id: str) -> Dict[str, Any]:
|
|
1139
|
+
r"""Closes a browser tab using its ID.
|
|
1140
|
+
|
|
1141
|
+
Use `get_tab_info` to find the ID of the tab to close. After
|
|
1142
|
+
closing, the browser will switch to another tab if available.
|
|
1143
|
+
|
|
1144
|
+
Args:
|
|
1145
|
+
tab_id (str): The ID of the tab to close.
|
|
1146
|
+
|
|
1147
|
+
Returns:
|
|
1148
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
1149
|
+
- "result" (str): Confirmation of the action.
|
|
1150
|
+
- "snapshot" (str): A snapshot of the active tab after
|
|
1151
|
+
closure.
|
|
1152
|
+
- "tabs" (List[Dict]): Information about remaining tabs.
|
|
1153
|
+
- "current_tab" (int): Index of the new active tab.
|
|
1154
|
+
- "total_tabs" (int): Total number of remaining tabs.
|
|
1155
|
+
"""
|
|
1156
|
+
try:
|
|
1157
|
+
ws_wrapper = await self._get_ws_wrapper()
|
|
1158
|
+
result = await ws_wrapper.close_tab(tab_id)
|
|
1159
|
+
|
|
1160
|
+
tab_info = await ws_wrapper.get_tab_info()
|
|
1161
|
+
result.update(
|
|
1162
|
+
{
|
|
1163
|
+
"tabs": tab_info,
|
|
1164
|
+
"current_tab": next(
|
|
1165
|
+
(
|
|
1166
|
+
i
|
|
1167
|
+
for i, tab in enumerate(tab_info)
|
|
1168
|
+
if tab.get("is_current")
|
|
1169
|
+
),
|
|
1170
|
+
0,
|
|
1171
|
+
),
|
|
1172
|
+
"total_tabs": len(tab_info),
|
|
1173
|
+
}
|
|
1174
|
+
)
|
|
1175
|
+
|
|
1176
|
+
return result
|
|
1177
|
+
except Exception as e:
|
|
1178
|
+
logger.error(f"Failed to close tab: {e}")
|
|
1179
|
+
return {
|
|
1180
|
+
"result": f"Error closing tab: {e}",
|
|
1181
|
+
"snapshot": "",
|
|
1182
|
+
"tabs": [],
|
|
1183
|
+
"current_tab": 0,
|
|
1184
|
+
"total_tabs": 0,
|
|
1185
|
+
}
|
|
1186
|
+
|
|
1187
|
+
async def browser_get_tab_info(self) -> Dict[str, Any]:
|
|
1188
|
+
r"""Gets a list of all open browser tabs and their information.
|
|
1189
|
+
|
|
1190
|
+
This includes each tab's index, title, and URL, and indicates which
|
|
1191
|
+
tab is currently active. Use this to manage multiple tabs.
|
|
1192
|
+
|
|
1193
|
+
Returns:
|
|
1194
|
+
Dict[str, Any]: A dictionary with tab information:
|
|
1195
|
+
- "tabs" (List[Dict]): A list of open tabs, each with:
|
|
1196
|
+
- "index" (int): The tab's zero-based index.
|
|
1197
|
+
- "title" (str): The page title.
|
|
1198
|
+
- "url" (str): The current URL.
|
|
1199
|
+
- "is_current" (bool): True if the tab is active.
|
|
1200
|
+
- "current_tab" (int): Index of the active tab.
|
|
1201
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
1202
|
+
"""
|
|
1203
|
+
try:
|
|
1204
|
+
ws_wrapper = await self._get_ws_wrapper()
|
|
1205
|
+
tab_info = await ws_wrapper.get_tab_info()
|
|
1206
|
+
|
|
1207
|
+
return {
|
|
1208
|
+
"tabs": tab_info,
|
|
1209
|
+
"current_tab": next(
|
|
1210
|
+
(
|
|
1211
|
+
i
|
|
1212
|
+
for i, tab in enumerate(tab_info)
|
|
1213
|
+
if tab.get("is_current")
|
|
1214
|
+
),
|
|
1215
|
+
0,
|
|
1216
|
+
),
|
|
1217
|
+
"total_tabs": len(tab_info),
|
|
1218
|
+
}
|
|
1219
|
+
except Exception as e:
|
|
1220
|
+
logger.error(f"Failed to get tab info: {e}")
|
|
1221
|
+
return {
|
|
1222
|
+
"tabs": [],
|
|
1223
|
+
"current_tab": 0,
|
|
1224
|
+
"total_tabs": 0,
|
|
1225
|
+
}
|
|
1226
|
+
|
|
1227
|
+
async def browser_console_view(self) -> Dict[str, Any]:
|
|
1228
|
+
r"""View current page console logs.
|
|
1229
|
+
|
|
1230
|
+
Returns:
|
|
1231
|
+
Dict[str, Any]: A dictionary with tab information:
|
|
1232
|
+
- "console_messages" (List[Dict]) : List of messages logged
|
|
1233
|
+
in the current page
|
|
1234
|
+
|
|
1235
|
+
"""
|
|
1236
|
+
try:
|
|
1237
|
+
ws_wrapper = await self._get_ws_wrapper()
|
|
1238
|
+
console_logs = await ws_wrapper.console_view()
|
|
1239
|
+
|
|
1240
|
+
return {"console_messages": console_logs}
|
|
1241
|
+
except Exception as e:
|
|
1242
|
+
logger.error(f"Failed to get console view: {e}")
|
|
1243
|
+
return {"console_messages": []}
|
|
1244
|
+
|
|
1245
|
+
async def browser_console_exec(self, code: str) -> Dict[str, Any]:
|
|
1246
|
+
r"""Execute javascript code in the console of the current page and get
|
|
1247
|
+
results.
|
|
1248
|
+
|
|
1249
|
+
Args:
|
|
1250
|
+
code (str): JavaScript code to execute in the browser console.
|
|
1251
|
+
|
|
1252
|
+
Returns:
|
|
1253
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
1254
|
+
- "result" (str): Confirmation of the action.
|
|
1255
|
+
- "snapshot" (str): A snapshot of the active tab after
|
|
1256
|
+
console execute action.
|
|
1257
|
+
- "tabs" (List[Dict]): Information about remaining tabs.
|
|
1258
|
+
- "current_tab" (int): Index of the new active tab.
|
|
1259
|
+
- "total_tabs" (int): Total number of remaining tabs.
|
|
1260
|
+
"""
|
|
1261
|
+
try:
|
|
1262
|
+
ws_wrapper = await self._get_ws_wrapper()
|
|
1263
|
+
result = await ws_wrapper.console_exec(code)
|
|
1264
|
+
|
|
1265
|
+
tab_info = await ws_wrapper.get_tab_info()
|
|
1266
|
+
result.update(
|
|
1267
|
+
{
|
|
1268
|
+
"tabs": tab_info,
|
|
1269
|
+
"current_tab": next(
|
|
1270
|
+
(
|
|
1271
|
+
i
|
|
1272
|
+
for i, tab in enumerate(tab_info)
|
|
1273
|
+
if tab.get("is_current")
|
|
1274
|
+
),
|
|
1275
|
+
0,
|
|
1276
|
+
),
|
|
1277
|
+
"total_tabs": len(tab_info),
|
|
1278
|
+
}
|
|
1279
|
+
)
|
|
1280
|
+
|
|
1281
|
+
return result
|
|
1282
|
+
except Exception as e:
|
|
1283
|
+
logger.error(f"Failed to execute javascript in console: {e}")
|
|
1284
|
+
return {
|
|
1285
|
+
"result": f"Error in code execution: {e}",
|
|
1286
|
+
"snapshot": "",
|
|
1287
|
+
"tabs": [],
|
|
1288
|
+
"current_tab": 0,
|
|
1289
|
+
"total_tabs": 0,
|
|
1290
|
+
}
|
|
1291
|
+
|
|
1292
|
+
@high_level_action
|
|
1293
|
+
async def browser_sheet_input(
|
|
1294
|
+
self, *, cells: List[SheetCell]
|
|
1295
|
+
) -> Dict[str, Any]:
|
|
1296
|
+
r"""Input text into multiple cells in a spreadsheet (e.g., Google
|
|
1297
|
+
Sheets).
|
|
1298
|
+
|
|
1299
|
+
Args:
|
|
1300
|
+
cells (List[Dict[str, Any]]): List of cells to input, each
|
|
1301
|
+
containing:
|
|
1302
|
+
- "row" (int): Row index (0-based). Row 0 = first row,
|
|
1303
|
+
Row 1 = second row, etc.
|
|
1304
|
+
- "col" (int): Column index (0-based). Col 0 = Column A,
|
|
1305
|
+
Col 1 = Column B, etc.
|
|
1306
|
+
- "text" (str): Text to input into the cell
|
|
1307
|
+
|
|
1308
|
+
Returns:
|
|
1309
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
1310
|
+
- "result" (str): Confirmation of the action with details.
|
|
1311
|
+
- "content" (str): The updated spreadsheet content (auto-read
|
|
1312
|
+
after input).
|
|
1313
|
+
- "snapshot" (str): Always empty string (sheet tools don't
|
|
1314
|
+
return snapshots).
|
|
1315
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
1316
|
+
- "current_tab" (int): Index of the active tab.
|
|
1317
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
1318
|
+
|
|
1319
|
+
Example:
|
|
1320
|
+
>>> cells = [
|
|
1321
|
+
... {"row": 0, "col": 0, "text": "Name"},
|
|
1322
|
+
... {"row": 0, "col": 1, "text": "Age"},
|
|
1323
|
+
... {"row": 1, "col": 0, "text": "Alice"},
|
|
1324
|
+
... {"row": 1, "col": 1, "text": "30"},
|
|
1325
|
+
... ]
|
|
1326
|
+
"""
|
|
1327
|
+
try:
|
|
1328
|
+
import platform
|
|
1329
|
+
|
|
1330
|
+
ws_wrapper = await self._get_ws_wrapper()
|
|
1331
|
+
system = platform.system()
|
|
1332
|
+
|
|
1333
|
+
# Normalize cells: convert column labels to indices if needed
|
|
1334
|
+
normalized_cells = []
|
|
1335
|
+
for cell in cells:
|
|
1336
|
+
normalized_cell = cell.copy()
|
|
1337
|
+
|
|
1338
|
+
# Convert column label (A, B, C, ...) to index if it's a string
|
|
1339
|
+
col = cell.get("col", 0)
|
|
1340
|
+
if isinstance(col, str):
|
|
1341
|
+
col = col.strip().upper()
|
|
1342
|
+
# Convert A->0, B->1, ..., Z->25, AA->26, AB->27, etc.
|
|
1343
|
+
col_index = 0
|
|
1344
|
+
for char in col:
|
|
1345
|
+
col_index = col_index * 26 + (ord(char) - ord('A') + 1)
|
|
1346
|
+
normalized_cell["col"] = col_index - 1
|
|
1347
|
+
else:
|
|
1348
|
+
normalized_cell["col"] = int(col)
|
|
1349
|
+
|
|
1350
|
+
# Row is always used as-is (should be 0-based integer)
|
|
1351
|
+
normalized_cell["row"] = int(cell.get("row", 0))
|
|
1352
|
+
normalized_cell["text"] = str(cell.get("text", ""))
|
|
1353
|
+
normalized_cells.append(normalized_cell)
|
|
1354
|
+
|
|
1355
|
+
# Perform batch input
|
|
1356
|
+
input_result = await self._sheet_input_batch_js(
|
|
1357
|
+
normalized_cells, ws_wrapper, system
|
|
1358
|
+
)
|
|
1359
|
+
|
|
1360
|
+
# Read sheet content after input
|
|
1361
|
+
try:
|
|
1362
|
+
read_result = await self.browser_sheet_read()
|
|
1363
|
+
return {
|
|
1364
|
+
"result": input_result["result"],
|
|
1365
|
+
"content": read_result.get("content", ""),
|
|
1366
|
+
"snapshot": "",
|
|
1367
|
+
"tabs": input_result.get("tabs", []),
|
|
1368
|
+
"current_tab": input_result.get("current_tab", 0),
|
|
1369
|
+
"total_tabs": input_result.get("total_tabs", 0),
|
|
1370
|
+
}
|
|
1371
|
+
except Exception as read_error:
|
|
1372
|
+
logger.warning(f"Failed to auto-read sheet: {read_error}")
|
|
1373
|
+
input_result["snapshot"] = ""
|
|
1374
|
+
return input_result
|
|
1375
|
+
|
|
1376
|
+
except Exception as e:
|
|
1377
|
+
logger.error(f"Failed to input to sheet: {e}")
|
|
1378
|
+
return {
|
|
1379
|
+
"result": f"Error inputting to sheet: {e}",
|
|
1380
|
+
"content": "",
|
|
1381
|
+
"snapshot": "",
|
|
1382
|
+
"tabs": [],
|
|
1383
|
+
"current_tab": 0,
|
|
1384
|
+
"total_tabs": 0,
|
|
1385
|
+
}
|
|
1386
|
+
|
|
1387
|
+
async def _sheet_input_batch_js(
|
|
1388
|
+
self,
|
|
1389
|
+
cells: List[SheetCell],
|
|
1390
|
+
ws_wrapper: Any,
|
|
1391
|
+
system: str,
|
|
1392
|
+
) -> Dict[str, Any]:
|
|
1393
|
+
r"""Input to sheet using batch keyboard input with relative
|
|
1394
|
+
positioning.
|
|
1395
|
+
|
|
1396
|
+
Builds all operations and sends them in ONE command to TypeScript,
|
|
1397
|
+
which executes them and only waits for stability once at the end.
|
|
1398
|
+
"""
|
|
1399
|
+
operations: List[Dict[str, Any]] = []
|
|
1400
|
+
|
|
1401
|
+
# Go to A1 to ensure we start from a known position
|
|
1402
|
+
if system == "Darwin":
|
|
1403
|
+
operations.append({"type": "press", "keys": ["Meta", "Home"]})
|
|
1404
|
+
else:
|
|
1405
|
+
operations.append({"type": "press", "keys": ["Control", "Home"]})
|
|
1406
|
+
operations.append({"type": "wait", "delay": 310})
|
|
1407
|
+
|
|
1408
|
+
# Start at (0, 0)
|
|
1409
|
+
current_row = 0
|
|
1410
|
+
current_col = 0
|
|
1411
|
+
|
|
1412
|
+
for cell in cells:
|
|
1413
|
+
target_row = cell.get("row", 0)
|
|
1414
|
+
target_col = cell.get("col", 0)
|
|
1415
|
+
text = cell.get("text", "")
|
|
1416
|
+
|
|
1417
|
+
# Calculate relative movement needed
|
|
1418
|
+
row_diff = target_row - current_row
|
|
1419
|
+
col_diff = target_col - current_col
|
|
1420
|
+
|
|
1421
|
+
# Navigate vertically
|
|
1422
|
+
if row_diff > 0:
|
|
1423
|
+
for _ in range(row_diff):
|
|
1424
|
+
operations.append({"type": "press", "keys": ["ArrowDown"]})
|
|
1425
|
+
operations.append({"type": "wait", "delay": 50})
|
|
1426
|
+
elif row_diff < 0:
|
|
1427
|
+
for _ in range(abs(row_diff)):
|
|
1428
|
+
operations.append({"type": "press", "keys": ["ArrowUp"]})
|
|
1429
|
+
operations.append({"type": "wait", "delay": 50})
|
|
1430
|
+
|
|
1431
|
+
# Navigate horizontally
|
|
1432
|
+
if col_diff > 0:
|
|
1433
|
+
for _ in range(col_diff):
|
|
1434
|
+
operations.append(
|
|
1435
|
+
{"type": "press", "keys": ["ArrowRight"]}
|
|
1436
|
+
)
|
|
1437
|
+
operations.append({"type": "wait", "delay": 50})
|
|
1438
|
+
elif col_diff < 0:
|
|
1439
|
+
for _ in range(abs(col_diff)):
|
|
1440
|
+
operations.append({"type": "press", "keys": ["ArrowLeft"]})
|
|
1441
|
+
operations.append({"type": "wait", "delay": 50})
|
|
1442
|
+
|
|
1443
|
+
# Wait after navigation if moved
|
|
1444
|
+
if row_diff != 0 or col_diff != 0:
|
|
1445
|
+
operations.append({"type": "wait", "delay": 100})
|
|
1446
|
+
|
|
1447
|
+
# Clear and input
|
|
1448
|
+
operations.append({"type": "press", "keys": ["Delete"]})
|
|
1449
|
+
operations.append({"type": "wait", "delay": 120})
|
|
1450
|
+
|
|
1451
|
+
if text:
|
|
1452
|
+
operations.append({"type": "type", "text": text, "delay": 0})
|
|
1453
|
+
operations.append({"type": "wait", "delay": 120})
|
|
1454
|
+
|
|
1455
|
+
# Press Enter to confirm
|
|
1456
|
+
operations.append({"type": "press", "keys": ["Enter"]})
|
|
1457
|
+
operations.append({"type": "wait", "delay": 130})
|
|
1458
|
+
|
|
1459
|
+
# Update current position (after Enter, cursor moves to next row)
|
|
1460
|
+
current_row = target_row + 1
|
|
1461
|
+
current_col = target_col
|
|
1462
|
+
|
|
1463
|
+
try:
|
|
1464
|
+
await ws_wrapper._send_command(
|
|
1465
|
+
'batch_keyboard_input',
|
|
1466
|
+
{'operations': operations, 'skipStabilityWait': True},
|
|
1467
|
+
)
|
|
1468
|
+
tab_info = await ws_wrapper.get_tab_info()
|
|
1469
|
+
|
|
1470
|
+
return {
|
|
1471
|
+
"result": f"Successfully input to {len(cells)} cells",
|
|
1472
|
+
"snapshot": "",
|
|
1473
|
+
"tabs": tab_info,
|
|
1474
|
+
"current_tab": next(
|
|
1475
|
+
(
|
|
1476
|
+
i
|
|
1477
|
+
for i, tab in enumerate(tab_info)
|
|
1478
|
+
if tab.get("is_current")
|
|
1479
|
+
),
|
|
1480
|
+
0,
|
|
1481
|
+
),
|
|
1482
|
+
"total_tabs": len(tab_info),
|
|
1483
|
+
}
|
|
1484
|
+
|
|
1485
|
+
except Exception as e:
|
|
1486
|
+
logger.error(f"Batch keyboard execution failed: {e}")
|
|
1487
|
+
return {
|
|
1488
|
+
"result": f"Error in batch keyboard execution: {e}",
|
|
1489
|
+
"snapshot": "",
|
|
1490
|
+
"tabs": [],
|
|
1491
|
+
"current_tab": 0,
|
|
1492
|
+
"total_tabs": 0,
|
|
1493
|
+
}
|
|
1494
|
+
|
|
1495
|
+
def _trim_sheet_content(self, content: str) -> str:
|
|
1496
|
+
"""Trim sheet content and add row/column labels.
|
|
1497
|
+
|
|
1498
|
+
Remove all empty rows and columns, then add:
|
|
1499
|
+
- Column headers: A, B, C, D...
|
|
1500
|
+
- Row numbers: 0, 1, 2, 3...
|
|
1501
|
+
|
|
1502
|
+
Args:
|
|
1503
|
+
content (str): Raw sheet content with tabs and newlines.
|
|
1504
|
+
|
|
1505
|
+
Returns:
|
|
1506
|
+
str: Trimmed content with row/column labels.
|
|
1507
|
+
"""
|
|
1508
|
+
if not content or not content.strip():
|
|
1509
|
+
return ""
|
|
1510
|
+
|
|
1511
|
+
# Split into rows and parse into 2D array
|
|
1512
|
+
rows = content.split('\n')
|
|
1513
|
+
grid: List[List[str]] = []
|
|
1514
|
+
max_cols = 0
|
|
1515
|
+
for row_str in rows:
|
|
1516
|
+
cells = row_str.split('\t')
|
|
1517
|
+
grid.append(cells)
|
|
1518
|
+
max_cols = max(max_cols, len(cells))
|
|
1519
|
+
|
|
1520
|
+
# Pad rows to same length
|
|
1521
|
+
for row_list in grid:
|
|
1522
|
+
while len(row_list) < max_cols:
|
|
1523
|
+
row_list.append('')
|
|
1524
|
+
|
|
1525
|
+
if not grid:
|
|
1526
|
+
return ""
|
|
1527
|
+
|
|
1528
|
+
# Find non-empty rows and columns (keep original indices)
|
|
1529
|
+
non_empty_rows = []
|
|
1530
|
+
for i, row_cells in enumerate(grid):
|
|
1531
|
+
if any(cell.strip() for cell in row_cells):
|
|
1532
|
+
non_empty_rows.append(i)
|
|
1533
|
+
|
|
1534
|
+
non_empty_cols = []
|
|
1535
|
+
for j in range(max_cols):
|
|
1536
|
+
if any(grid[i][j].strip() for i in range(len(grid))):
|
|
1537
|
+
non_empty_cols.append(j)
|
|
1538
|
+
|
|
1539
|
+
# If no content found
|
|
1540
|
+
if not non_empty_rows or not non_empty_cols:
|
|
1541
|
+
return ""
|
|
1542
|
+
|
|
1543
|
+
# Extract non-empty rows and columns
|
|
1544
|
+
filtered_grid = []
|
|
1545
|
+
for i in non_empty_rows:
|
|
1546
|
+
filtered_row = [grid[i][j] for j in non_empty_cols]
|
|
1547
|
+
filtered_grid.append(filtered_row)
|
|
1548
|
+
|
|
1549
|
+
# Generate column labels using original column indices
|
|
1550
|
+
def col_label(index):
|
|
1551
|
+
label = ""
|
|
1552
|
+
while True:
|
|
1553
|
+
label = chr(65 + (index % 26)) + label
|
|
1554
|
+
index = index // 26
|
|
1555
|
+
if index == 0:
|
|
1556
|
+
break
|
|
1557
|
+
index -= 1
|
|
1558
|
+
return label
|
|
1559
|
+
|
|
1560
|
+
col_headers = [col_label(j) for j in non_empty_cols]
|
|
1561
|
+
|
|
1562
|
+
# Add column headers as first row
|
|
1563
|
+
result_rows = ['\t'.join(['', *col_headers])]
|
|
1564
|
+
|
|
1565
|
+
# Add data rows with original row numbers (0-based)
|
|
1566
|
+
for row_idx, row_data in zip(non_empty_rows, filtered_grid):
|
|
1567
|
+
result_rows.append('\t'.join([str(row_idx), *row_data]))
|
|
1568
|
+
|
|
1569
|
+
return '\n'.join(result_rows)
|
|
1570
|
+
|
|
1571
|
+
@high_level_action
|
|
1572
|
+
async def browser_sheet_read(self) -> Dict[str, Any]:
|
|
1573
|
+
r"""Read content from a spreadsheet.
|
|
1574
|
+
|
|
1575
|
+
This tool reads spreadsheet content and returns it in a structured
|
|
1576
|
+
format with row/column labels. Empty rows and columns are
|
|
1577
|
+
automatically removed.
|
|
1578
|
+
|
|
1579
|
+
Output format:
|
|
1580
|
+
- First row: Column labels (A, B, C, ..., Z, AA, AB, ...)
|
|
1581
|
+
- First column: Row numbers (0, 1, 2, 3, ...) - 0-based
|
|
1582
|
+
- Labels show ORIGINAL positions in the spreadsheet (before removing
|
|
1583
|
+
empty rows/columns)
|
|
1584
|
+
|
|
1585
|
+
Row/column indices match browser_sheet_input directly:
|
|
1586
|
+
- Row label "0" in output = row index 0 in browser_sheet_input
|
|
1587
|
+
- Column label "A" in output = col index 0 in browser_sheet_input
|
|
1588
|
+
- Column label "C" in output = col index 2 in browser_sheet_input
|
|
1589
|
+
|
|
1590
|
+
Returns:
|
|
1591
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
1592
|
+
- "result" (str): Confirmation message.
|
|
1593
|
+
- "content" (str): Tab-separated spreadsheet content with
|
|
1594
|
+
row/column labels. Format:
|
|
1595
|
+
Line 1: "\tA\tB\tC" (column headers)
|
|
1596
|
+
Line 2+: "0\tdata1\tdata2\tdata3" (row number + data)
|
|
1597
|
+
- "snapshot" (str): Always empty string (sheet tools don't
|
|
1598
|
+
return snapshots).
|
|
1599
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
1600
|
+
- "current_tab" (int): Index of the active tab.
|
|
1601
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
1602
|
+
|
|
1603
|
+
Example output:
|
|
1604
|
+
A B
|
|
1605
|
+
0 Name Age
|
|
1606
|
+
1 Alice 30
|
|
1607
|
+
2 Bob 25
|
|
1608
|
+
"""
|
|
1609
|
+
import platform
|
|
1610
|
+
import uuid
|
|
1611
|
+
|
|
1612
|
+
ws_wrapper = await self._get_ws_wrapper()
|
|
1613
|
+
|
|
1614
|
+
# Use unique ID to avoid conflicts in parallel execution
|
|
1615
|
+
request_id = str(uuid.uuid4())
|
|
1616
|
+
var_name = f"__sheetCopy_{request_id.replace('-', '_')}"
|
|
1617
|
+
|
|
1618
|
+
try:
|
|
1619
|
+
# Step 1: Setup copy interception with multiple captures
|
|
1620
|
+
js_inject = f"""
|
|
1621
|
+
window.{var_name} = [];
|
|
1622
|
+
let copyCount = 0;
|
|
1623
|
+
const copyListener = function(e) {{
|
|
1624
|
+
try {{
|
|
1625
|
+
// Intercept clipboard data before system clipboard write
|
|
1626
|
+
// Capture from Google Sheets' setData call
|
|
1627
|
+
const originalSetData = e.clipboardData.setData.bind(
|
|
1628
|
+
e.clipboardData
|
|
1629
|
+
);
|
|
1630
|
+
let capturedText = '';
|
|
1631
|
+
|
|
1632
|
+
e.clipboardData.setData = function(type, data) {{
|
|
1633
|
+
if (type === 'text/plain') {{
|
|
1634
|
+
capturedText = data;
|
|
1635
|
+
}}
|
|
1636
|
+
// Prevent system clipboard write
|
|
1637
|
+
}};
|
|
1638
|
+
|
|
1639
|
+
// Let Google Sheets process event (calls setData)
|
|
1640
|
+
// Event propagates and Sheets tries to set clipboard
|
|
1641
|
+
setTimeout(() => {{
|
|
1642
|
+
copyCount++;
|
|
1643
|
+
window.{var_name}.push(capturedText);
|
|
1644
|
+
}}, 0);
|
|
1645
|
+
|
|
1646
|
+
// Prevent the default browser copy behavior
|
|
1647
|
+
e.preventDefault();
|
|
1648
|
+
}} catch (err) {{
|
|
1649
|
+
console.error(
|
|
1650
|
+
'[SheetRead] Failed to intercept copy data:', err
|
|
1651
|
+
);
|
|
1652
|
+
}}
|
|
1653
|
+
}};
|
|
1654
|
+
|
|
1655
|
+
document.addEventListener('copy', copyListener, true);
|
|
1656
|
+
window.{var_name}_removeListener = () => {{
|
|
1657
|
+
document.removeEventListener('copy', copyListener, true);
|
|
1658
|
+
}};
|
|
1659
|
+
|
|
1660
|
+
'Copy listener installed';
|
|
1661
|
+
"""
|
|
1662
|
+
await ws_wrapper.console_exec(js_inject)
|
|
1663
|
+
|
|
1664
|
+
system = platform.system()
|
|
1665
|
+
import asyncio
|
|
1666
|
+
|
|
1667
|
+
if system == "Darwin":
|
|
1668
|
+
select_all_copy_ops: List[Dict[str, Any]] = [
|
|
1669
|
+
{"type": "press", "keys": ["Meta", "a"]},
|
|
1670
|
+
{"type": "wait", "delay": 100},
|
|
1671
|
+
{"type": "press", "keys": ["Meta", "c"]},
|
|
1672
|
+
]
|
|
1673
|
+
await ws_wrapper._send_command(
|
|
1674
|
+
'batch_keyboard_input',
|
|
1675
|
+
{
|
|
1676
|
+
'operations': select_all_copy_ops,
|
|
1677
|
+
'skipStabilityWait': True,
|
|
1678
|
+
},
|
|
1679
|
+
)
|
|
1680
|
+
await asyncio.sleep(0.2)
|
|
1681
|
+
|
|
1682
|
+
# Repeat to capture correct one
|
|
1683
|
+
await ws_wrapper._send_command(
|
|
1684
|
+
'batch_keyboard_input',
|
|
1685
|
+
{
|
|
1686
|
+
'operations': select_all_copy_ops,
|
|
1687
|
+
'skipStabilityWait': True,
|
|
1688
|
+
},
|
|
1689
|
+
)
|
|
1690
|
+
await asyncio.sleep(0.2)
|
|
1691
|
+
else:
|
|
1692
|
+
select_all_copy_ops = [
|
|
1693
|
+
{"type": "press", "keys": ["Control", "a"]},
|
|
1694
|
+
{"type": "wait", "delay": 100},
|
|
1695
|
+
{"type": "press", "keys": ["Control", "c"]},
|
|
1696
|
+
]
|
|
1697
|
+
await ws_wrapper._send_command(
|
|
1698
|
+
'batch_keyboard_input',
|
|
1699
|
+
{
|
|
1700
|
+
'operations': select_all_copy_ops,
|
|
1701
|
+
'skipStabilityWait': True,
|
|
1702
|
+
},
|
|
1703
|
+
)
|
|
1704
|
+
await asyncio.sleep(0.2)
|
|
1705
|
+
|
|
1706
|
+
# Repeat to capture correct one
|
|
1707
|
+
await ws_wrapper._send_command(
|
|
1708
|
+
'batch_keyboard_input',
|
|
1709
|
+
{
|
|
1710
|
+
'operations': select_all_copy_ops,
|
|
1711
|
+
'skipStabilityWait': True,
|
|
1712
|
+
},
|
|
1713
|
+
)
|
|
1714
|
+
await asyncio.sleep(0.2)
|
|
1715
|
+
|
|
1716
|
+
js_check = f"window.{var_name} || []"
|
|
1717
|
+
content_result = await ws_wrapper.console_exec(js_check)
|
|
1718
|
+
result_str = content_result.get("result", "[]")
|
|
1719
|
+
|
|
1720
|
+
import json
|
|
1721
|
+
|
|
1722
|
+
if isinstance(result_str, list):
|
|
1723
|
+
captured_contents = result_str
|
|
1724
|
+
elif isinstance(result_str, str):
|
|
1725
|
+
if result_str.startswith("Console execution result: "):
|
|
1726
|
+
result_str = result_str[
|
|
1727
|
+
len("Console execution result: ") :
|
|
1728
|
+
]
|
|
1729
|
+
result_str = result_str.strip()
|
|
1730
|
+
|
|
1731
|
+
try:
|
|
1732
|
+
captured_contents = json.loads(result_str)
|
|
1733
|
+
except json.JSONDecodeError:
|
|
1734
|
+
captured_contents = []
|
|
1735
|
+
else:
|
|
1736
|
+
captured_contents = []
|
|
1737
|
+
|
|
1738
|
+
if not captured_contents:
|
|
1739
|
+
sheet_content = ""
|
|
1740
|
+
elif len(captured_contents) == 1:
|
|
1741
|
+
sheet_content = captured_contents[0]
|
|
1742
|
+
else:
|
|
1743
|
+
|
|
1744
|
+
def count_non_empty_cells(content):
|
|
1745
|
+
if not content:
|
|
1746
|
+
return 0
|
|
1747
|
+
count = 0
|
|
1748
|
+
for line in content.split('\n'):
|
|
1749
|
+
for cell in line.split('\t'):
|
|
1750
|
+
if cell.strip():
|
|
1751
|
+
count += 1
|
|
1752
|
+
return count
|
|
1753
|
+
|
|
1754
|
+
counts = [
|
|
1755
|
+
count_non_empty_cells(content)
|
|
1756
|
+
for content in captured_contents[:2]
|
|
1757
|
+
]
|
|
1758
|
+
best_idx = 0 if counts[0] > counts[1] else 1
|
|
1759
|
+
sheet_content = captured_contents[best_idx]
|
|
1760
|
+
|
|
1761
|
+
sheet_content = self._trim_sheet_content(sheet_content)
|
|
1762
|
+
|
|
1763
|
+
tab_info = await ws_wrapper.get_tab_info()
|
|
1764
|
+
|
|
1765
|
+
return {
|
|
1766
|
+
"result": "Successfully read spreadsheet content",
|
|
1767
|
+
"content": sheet_content,
|
|
1768
|
+
"snapshot": "", # Sheet tools don't return snapshots
|
|
1769
|
+
"tabs": tab_info,
|
|
1770
|
+
"current_tab": next(
|
|
1771
|
+
(
|
|
1772
|
+
i
|
|
1773
|
+
for i, tab in enumerate(tab_info)
|
|
1774
|
+
if tab.get("is_current")
|
|
1775
|
+
),
|
|
1776
|
+
0,
|
|
1777
|
+
),
|
|
1778
|
+
"total_tabs": len(tab_info),
|
|
1779
|
+
}
|
|
1780
|
+
|
|
1781
|
+
except Exception as e:
|
|
1782
|
+
logger.error(f"Failed to read sheet: {e}")
|
|
1783
|
+
return {
|
|
1784
|
+
"result": f"Error reading sheet: {e}",
|
|
1785
|
+
"content": "",
|
|
1786
|
+
"snapshot": "",
|
|
1787
|
+
"tabs": [],
|
|
1788
|
+
"current_tab": 0,
|
|
1789
|
+
"total_tabs": 0,
|
|
1790
|
+
}
|
|
1791
|
+
finally:
|
|
1792
|
+
js_cleanup = f"""
|
|
1793
|
+
if (window.{var_name}_removeListener) {{
|
|
1794
|
+
window.{var_name}_removeListener();
|
|
1795
|
+
}}
|
|
1796
|
+
delete window.{var_name};
|
|
1797
|
+
delete window.{var_name}_removeListener;
|
|
1798
|
+
'cleaned'
|
|
1799
|
+
"""
|
|
1800
|
+
with contextlib.suppress(Exception):
|
|
1801
|
+
await ws_wrapper.console_exec(js_cleanup)
|
|
1802
|
+
|
|
1803
|
+
# Additional methods for backward compatibility
|
|
1804
|
+
async def browser_wait_user(
|
|
1805
|
+
self, timeout_sec: Optional[float] = None
|
|
1806
|
+
) -> Dict[str, Any]:
|
|
1807
|
+
r"""Pauses execution and waits for human input from the console.
|
|
1808
|
+
|
|
1809
|
+
Use this for tasks requiring manual steps, like solving a CAPTCHA.
|
|
1810
|
+
The
|
|
1811
|
+
agent will resume after the user presses Enter in the console.
|
|
1812
|
+
|
|
1813
|
+
Args:
|
|
1814
|
+
timeout_sec (Optional[float]): Max time to wait in seconds. If
|
|
1815
|
+
`None`, it will wait indefinitely.
|
|
1816
|
+
|
|
1817
|
+
Returns:
|
|
1818
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
1819
|
+
- "result" (str): A message indicating how the wait ended.
|
|
1820
|
+
- "snapshot" (str): The page snapshot after the wait.
|
|
1821
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
1822
|
+
- "current_tab" (int): Index of the active tab.
|
|
1823
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
1824
|
+
"""
|
|
1825
|
+
import asyncio
|
|
1826
|
+
|
|
1827
|
+
prompt = (
|
|
1828
|
+
"🕑 Agent waiting for human input. "
|
|
1829
|
+
"Complete action in browser, then press Enter..."
|
|
1830
|
+
)
|
|
1831
|
+
logger.info(f"\n{prompt}\n")
|
|
1832
|
+
|
|
1833
|
+
async def _await_enter():
|
|
1834
|
+
try:
|
|
1835
|
+
await asyncio.to_thread(
|
|
1836
|
+
input, ">>> Press Enter to resume <<<\n"
|
|
1837
|
+
)
|
|
1838
|
+
except (asyncio.CancelledError, Exception):
|
|
1839
|
+
# Handle cancellation gracefully
|
|
1840
|
+
pass
|
|
1841
|
+
|
|
1842
|
+
try:
|
|
1843
|
+
if timeout_sec is not None:
|
|
1844
|
+
logger.info(
|
|
1845
|
+
f"Waiting for user input with timeout: {timeout_sec}s"
|
|
1846
|
+
)
|
|
1847
|
+
start_time = time.time()
|
|
1848
|
+
task = asyncio.create_task(_await_enter())
|
|
1849
|
+
try:
|
|
1850
|
+
await asyncio.wait_for(task, timeout=timeout_sec)
|
|
1851
|
+
wait_time = time.time() - start_time
|
|
1852
|
+
logger.info(f"User input received after {wait_time:.2f}s")
|
|
1853
|
+
result_msg = "User resumed."
|
|
1854
|
+
except asyncio.TimeoutError:
|
|
1855
|
+
task.cancel()
|
|
1856
|
+
# Wait for task to be cancelled properly
|
|
1857
|
+
try:
|
|
1858
|
+
await task
|
|
1859
|
+
except asyncio.CancelledError:
|
|
1860
|
+
pass
|
|
1861
|
+
raise
|
|
1862
|
+
else:
|
|
1863
|
+
logger.info("Waiting for user input (no timeout)")
|
|
1864
|
+
start_time = time.time()
|
|
1865
|
+
await _await_enter()
|
|
1866
|
+
wait_time = time.time() - start_time
|
|
1867
|
+
logger.info(f"User input received after {wait_time:.2f}s")
|
|
1868
|
+
result_msg = "User resumed."
|
|
1869
|
+
except asyncio.TimeoutError:
|
|
1870
|
+
wait_time = timeout_sec or 0.0
|
|
1871
|
+
logger.info(
|
|
1872
|
+
f"User input timeout reached after {wait_time}s, "
|
|
1873
|
+
f"auto-resuming"
|
|
1874
|
+
)
|
|
1875
|
+
result_msg = f"Timeout {timeout_sec}s reached, auto-resumed."
|
|
1876
|
+
|
|
1877
|
+
try:
|
|
1878
|
+
snapshot = await self.browser_get_page_snapshot()
|
|
1879
|
+
tab_info = await self.browser_get_tab_info()
|
|
1880
|
+
return {"result": result_msg, "snapshot": snapshot, **tab_info}
|
|
1881
|
+
except Exception as e:
|
|
1882
|
+
logger.warning(f"Failed to get snapshot after wait: {e}")
|
|
1883
|
+
return {
|
|
1884
|
+
"result": result_msg,
|
|
1885
|
+
"snapshot": "",
|
|
1886
|
+
"tabs": [],
|
|
1887
|
+
"current_tab": 0,
|
|
1888
|
+
"total_tabs": 0,
|
|
1889
|
+
}
|
|
1890
|
+
|
|
1891
|
+
def clone_for_new_session(
|
|
1892
|
+
self, new_session_id: Optional[str] = None
|
|
1893
|
+
) -> "HybridBrowserToolkit":
|
|
1894
|
+
r"""Create a new instance of HybridBrowserToolkit with a unique
|
|
1895
|
+
session.
|
|
1896
|
+
|
|
1897
|
+
Args:
|
|
1898
|
+
new_session_id: Optional new session ID. If None, a UUID will be
|
|
1899
|
+
generated.
|
|
1900
|
+
|
|
1901
|
+
Returns:
|
|
1902
|
+
A new HybridBrowserToolkit instance with the same configuration
|
|
1903
|
+
but a different session.
|
|
1904
|
+
"""
|
|
1905
|
+
import uuid
|
|
1906
|
+
|
|
1907
|
+
if new_session_id is None:
|
|
1908
|
+
new_session_id = str(uuid.uuid4())[:8]
|
|
1909
|
+
|
|
1910
|
+
return HybridBrowserToolkit(
|
|
1911
|
+
headless=self._headless,
|
|
1912
|
+
user_data_dir=self._user_data_dir,
|
|
1913
|
+
stealth=self._stealth,
|
|
1914
|
+
cache_dir=f"{self._cache_dir.rstrip('/')}_clone_"
|
|
1915
|
+
f"{new_session_id}/",
|
|
1916
|
+
enabled_tools=self.enabled_tools.copy(),
|
|
1917
|
+
browser_log_to_file=self._browser_log_to_file,
|
|
1918
|
+
session_id=new_session_id,
|
|
1919
|
+
default_start_url=self._default_start_url,
|
|
1920
|
+
default_timeout=self._default_timeout,
|
|
1921
|
+
short_timeout=self._short_timeout,
|
|
1922
|
+
navigation_timeout=self._navigation_timeout,
|
|
1923
|
+
network_idle_timeout=self._network_idle_timeout,
|
|
1924
|
+
screenshot_timeout=self._screenshot_timeout,
|
|
1925
|
+
page_stability_timeout=self._page_stability_timeout,
|
|
1926
|
+
dom_content_loaded_timeout=self._dom_content_loaded_timeout,
|
|
1927
|
+
viewport_limit=self._viewport_limit,
|
|
1928
|
+
full_visual_mode=self._full_visual_mode,
|
|
1929
|
+
)
|
|
1930
|
+
|
|
1931
|
+
def get_tools(self) -> List[FunctionTool]:
|
|
1932
|
+
r"""Get available function tools based
|
|
1933
|
+
on enabled_tools configuration."""
|
|
1934
|
+
# Map tool names to their corresponding methods
|
|
1935
|
+
tool_map = {
|
|
1936
|
+
"browser_open": self.browser_open,
|
|
1937
|
+
"browser_close": self.browser_close,
|
|
1938
|
+
"browser_visit_page": self.browser_visit_page,
|
|
1939
|
+
"browser_back": self.browser_back,
|
|
1940
|
+
"browser_forward": self.browser_forward,
|
|
1941
|
+
"browser_get_page_snapshot": self.browser_get_page_snapshot,
|
|
1942
|
+
"browser_get_som_screenshot": self.browser_get_som_screenshot,
|
|
1943
|
+
"browser_click": self.browser_click,
|
|
1944
|
+
"browser_type": self.browser_type,
|
|
1945
|
+
"browser_select": self.browser_select,
|
|
1946
|
+
"browser_scroll": self.browser_scroll,
|
|
1947
|
+
"browser_enter": self.browser_enter,
|
|
1948
|
+
"browser_mouse_control": self.browser_mouse_control,
|
|
1949
|
+
"browser_mouse_drag": self.browser_mouse_drag,
|
|
1950
|
+
"browser_press_key": self.browser_press_key,
|
|
1951
|
+
"browser_wait_user": self.browser_wait_user,
|
|
1952
|
+
"browser_switch_tab": self.browser_switch_tab,
|
|
1953
|
+
"browser_close_tab": self.browser_close_tab,
|
|
1954
|
+
"browser_get_tab_info": self.browser_get_tab_info,
|
|
1955
|
+
"browser_console_view": self.browser_console_view,
|
|
1956
|
+
"browser_console_exec": self.browser_console_exec,
|
|
1957
|
+
"browser_sheet_input": self.browser_sheet_input,
|
|
1958
|
+
"browser_sheet_read": self.browser_sheet_read,
|
|
1959
|
+
}
|
|
1960
|
+
|
|
1961
|
+
enabled_tools = []
|
|
1962
|
+
|
|
1963
|
+
for tool_name in self.enabled_tools:
|
|
1964
|
+
if tool_name in tool_map:
|
|
1965
|
+
tool = FunctionTool(
|
|
1966
|
+
cast(Callable[..., Any], tool_map[tool_name])
|
|
1967
|
+
)
|
|
1968
|
+
enabled_tools.append(tool)
|
|
1969
|
+
else:
|
|
1970
|
+
logger.warning(f"Unknown tool name: {tool_name}")
|
|
1971
|
+
|
|
1972
|
+
logger.info(f"Returning {len(enabled_tools)} enabled tools")
|
|
1973
|
+
return enabled_tools
|