auto-coder 0.1.400__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of auto-coder might be problematic. Click here for more details.
- auto_coder-2.0.0.dist-info/LICENSE +158 -0
- auto_coder-2.0.0.dist-info/METADATA +558 -0
- auto_coder-2.0.0.dist-info/RECORD +795 -0
- {auto_coder-0.1.400.dist-info → auto_coder-2.0.0.dist-info}/WHEEL +1 -1
- {auto_coder-0.1.400.dist-info → auto_coder-2.0.0.dist-info}/entry_points.txt +3 -3
- autocoder/__init__.py +31 -0
- autocoder/agent/auto_filegroup.py +32 -13
- autocoder/agent/auto_learn_from_commit.py +9 -1
- autocoder/agent/base_agentic/__init__.py +3 -0
- autocoder/agent/base_agentic/agent_hub.py +1 -1
- autocoder/agent/base_agentic/base_agent.py +235 -136
- autocoder/agent/base_agentic/default_tools.py +119 -118
- autocoder/agent/base_agentic/test_base_agent.py +1 -1
- autocoder/agent/base_agentic/tool_registry.py +32 -20
- autocoder/agent/base_agentic/tools/read_file_tool_resolver.py +25 -4
- autocoder/agent/base_agentic/tools/write_to_file_tool_resolver.py +24 -11
- autocoder/agent/base_agentic/types.py +42 -0
- autocoder/agent/entry_command_agent/chat.py +73 -59
- autocoder/auto_coder.py +31 -40
- autocoder/auto_coder_rag.py +11 -1084
- autocoder/auto_coder_runner.py +1029 -2310
- autocoder/auto_coder_terminal.py +26 -0
- autocoder/auto_coder_terminal_v3.py +190 -0
- autocoder/chat/conf_command.py +224 -124
- autocoder/chat/models_command.py +361 -299
- autocoder/chat/rules_command.py +79 -31
- autocoder/chat_auto_coder.py +1021 -372
- autocoder/chat_auto_coder_lang.py +23 -732
- autocoder/commands/auto_command.py +26 -9
- autocoder/commands/auto_web.py +1 -1
- autocoder/commands/tools.py +44 -44
- autocoder/common/__init__.py +150 -128
- autocoder/common/ac_style_command_parser/__init__.py +39 -2
- autocoder/common/ac_style_command_parser/config.py +422 -0
- autocoder/common/ac_style_command_parser/parser.py +292 -78
- autocoder/common/ac_style_command_parser/test_parser.py +241 -16
- autocoder/common/ac_style_command_parser/test_typed_parser.py +342 -0
- autocoder/common/ac_style_command_parser/typed_parser.py +653 -0
- autocoder/common/action_yml_file_manager.py +25 -13
- autocoder/common/agent_events/__init__.py +52 -0
- autocoder/common/agent_events/agent_event_emitter.py +193 -0
- autocoder/common/agent_events/event_factory.py +177 -0
- autocoder/common/agent_events/examples.py +307 -0
- autocoder/common/agent_events/types.py +113 -0
- autocoder/common/agent_events/utils.py +68 -0
- autocoder/common/agent_hooks/__init__.py +44 -0
- autocoder/common/agent_hooks/examples.py +582 -0
- autocoder/common/agent_hooks/hook_executor.py +217 -0
- autocoder/common/agent_hooks/hook_manager.py +288 -0
- autocoder/common/agent_hooks/types.py +133 -0
- autocoder/common/agent_hooks/utils.py +99 -0
- autocoder/common/agent_query_queue/queue_executor.py +324 -0
- autocoder/common/agent_query_queue/queue_manager.py +325 -0
- autocoder/common/agents/__init__.py +11 -0
- autocoder/common/agents/agent_manager.py +323 -0
- autocoder/common/agents/agent_parser.py +189 -0
- autocoder/common/agents/example_usage.py +344 -0
- autocoder/common/agents/integration_example.py +330 -0
- autocoder/common/agents/test_agent_parser.py +545 -0
- autocoder/common/async_utils.py +101 -0
- autocoder/common/auto_coder_lang.py +23 -972
- autocoder/common/autocoderargs_parser/__init__.py +14 -0
- autocoder/common/autocoderargs_parser/parser.py +184 -0
- autocoder/common/autocoderargs_parser/tests/__init__.py +1 -0
- autocoder/common/autocoderargs_parser/tests/test_args_parser.py +235 -0
- autocoder/common/autocoderargs_parser/tests/test_token_parser.py +195 -0
- autocoder/common/autocoderargs_parser/token_parser.py +290 -0
- autocoder/common/buildin_tokenizer.py +2 -4
- autocoder/common/code_auto_generate.py +149 -74
- autocoder/common/code_auto_generate_diff.py +163 -70
- autocoder/common/code_auto_generate_editblock.py +179 -89
- autocoder/common/code_auto_generate_strict_diff.py +167 -72
- autocoder/common/code_auto_merge_editblock.py +13 -6
- autocoder/common/code_modification_ranker.py +1 -1
- autocoder/common/command_completer.py +3 -3
- autocoder/common/command_file_manager/manager.py +183 -47
- autocoder/common/command_file_manager/test_command_file_manager.py +507 -0
- autocoder/common/command_templates.py +1 -1
- autocoder/common/conf_utils.py +2 -4
- autocoder/common/conversations/config.py +11 -3
- autocoder/common/conversations/get_conversation_manager.py +100 -2
- autocoder/common/conversations/llm_stats_models.py +264 -0
- autocoder/common/conversations/manager.py +112 -28
- autocoder/common/conversations/models.py +16 -2
- autocoder/common/conversations/storage/index_manager.py +134 -10
- autocoder/common/core_config/__init__.py +63 -0
- autocoder/common/core_config/agentic_mode_manager.py +109 -0
- autocoder/common/core_config/base_manager.py +123 -0
- autocoder/common/core_config/compatibility.py +151 -0
- autocoder/common/core_config/config_manager.py +156 -0
- autocoder/common/core_config/conversation_manager.py +31 -0
- autocoder/common/core_config/exclude_manager.py +72 -0
- autocoder/common/core_config/file_manager.py +177 -0
- autocoder/common/core_config/human_as_model_manager.py +129 -0
- autocoder/common/core_config/lib_manager.py +54 -0
- autocoder/common/core_config/main_manager.py +81 -0
- autocoder/common/core_config/mode_manager.py +126 -0
- autocoder/common/core_config/models.py +70 -0
- autocoder/common/core_config/test_memory_manager.py +1056 -0
- autocoder/common/env_manager.py +282 -0
- autocoder/common/env_manager_usage_example.py +211 -0
- autocoder/common/file_checkpoint/conversation_checkpoint.py +19 -19
- autocoder/common/file_checkpoint/manager.py +264 -48
- autocoder/common/file_checkpoint/test_backup.py +1 -18
- autocoder/common/file_checkpoint/test_manager.py +270 -1
- autocoder/common/file_checkpoint/test_store.py +1 -17
- autocoder/common/file_handler/__init__.py +23 -0
- autocoder/common/file_handler/active_context_handler.py +159 -0
- autocoder/common/file_handler/add_files_handler.py +409 -0
- autocoder/common/file_handler/chat_handler.py +180 -0
- autocoder/common/file_handler/coding_handler.py +401 -0
- autocoder/common/file_handler/commit_handler.py +200 -0
- autocoder/common/file_handler/lib_handler.py +156 -0
- autocoder/common/file_handler/list_files_handler.py +111 -0
- autocoder/common/file_handler/mcp_handler.py +268 -0
- autocoder/common/file_handler/models_handler.py +493 -0
- autocoder/common/file_handler/remove_files_handler.py +172 -0
- autocoder/common/file_monitor/test_file_monitor.py +307 -0
- autocoder/common/git_utils.py +51 -10
- autocoder/common/global_cancel.py +15 -6
- autocoder/common/ignorefiles/test_ignore_file_utils.py +1 -1
- autocoder/common/international/__init__.py +31 -0
- autocoder/common/international/demo_international.py +92 -0
- autocoder/common/international/message_manager.py +157 -0
- autocoder/common/international/messages/__init__.py +56 -0
- autocoder/common/international/messages/async_command_messages.py +507 -0
- autocoder/common/international/messages/auto_coder_messages.py +2208 -0
- autocoder/common/international/messages/chat_auto_coder_messages.py +1547 -0
- autocoder/common/international/messages/command_help_messages.py +986 -0
- autocoder/common/international/messages/conversation_command_messages.py +191 -0
- autocoder/common/international/messages/git_helper_plugin_messages.py +159 -0
- autocoder/common/international/messages/queue_command_messages.py +751 -0
- autocoder/common/international/messages/rules_command_messages.py +77 -0
- autocoder/common/international/messages/sdk_messages.py +1707 -0
- autocoder/common/international/messages/token_helper_plugin_messages.py +361 -0
- autocoder/common/international/messages/tool_display_messages.py +1212 -0
- autocoder/common/international/messages/workflow_exception_messages.py +473 -0
- autocoder/common/international/test_international.py +612 -0
- autocoder/common/linter_core/__init__.py +28 -0
- autocoder/common/linter_core/base_linter.py +61 -0
- autocoder/common/linter_core/config_loader.py +271 -0
- autocoder/common/linter_core/formatters/__init__.py +0 -0
- autocoder/common/linter_core/formatters/base_formatter.py +38 -0
- autocoder/common/linter_core/formatters/raw_formatter.py +17 -0
- autocoder/common/linter_core/linter.py +166 -0
- autocoder/common/linter_core/linter_factory.py +216 -0
- autocoder/common/linter_core/linter_manager.py +333 -0
- autocoder/common/linter_core/linters/__init__.py +9 -0
- autocoder/common/linter_core/linters/java_linter.py +342 -0
- autocoder/common/linter_core/linters/python_linter.py +115 -0
- autocoder/common/linter_core/linters/typescript_linter.py +119 -0
- autocoder/common/linter_core/models/__init__.py +7 -0
- autocoder/common/linter_core/models/lint_result.py +91 -0
- autocoder/common/linter_core/models.py +33 -0
- autocoder/common/linter_core/tests/__init__.py +3 -0
- autocoder/common/linter_core/tests/test_config_loader.py +323 -0
- autocoder/common/linter_core/tests/test_config_loading.py +308 -0
- autocoder/common/linter_core/tests/test_factory_manager.py +234 -0
- autocoder/common/linter_core/tests/test_formatters.py +147 -0
- autocoder/common/linter_core/tests/test_integration.py +317 -0
- autocoder/common/linter_core/tests/test_java_linter.py +496 -0
- autocoder/common/linter_core/tests/test_linters.py +265 -0
- autocoder/common/linter_core/tests/test_models.py +81 -0
- autocoder/common/linter_core/tests/verify_config_loading.py +296 -0
- autocoder/common/linter_core/tests/verify_fixes.py +183 -0
- autocoder/common/llm_friendly_package/__init__.py +31 -0
- autocoder/common/llm_friendly_package/base_manager.py +102 -0
- autocoder/common/llm_friendly_package/docs_manager.py +121 -0
- autocoder/common/llm_friendly_package/library_manager.py +171 -0
- autocoder/common/{llm_friendly_package.py → llm_friendly_package/main_manager.py} +204 -231
- autocoder/common/llm_friendly_package/models.py +40 -0
- autocoder/common/llm_friendly_package/test_llm_friendly_package.py +536 -0
- autocoder/common/llms/__init__.py +15 -0
- autocoder/common/llms/demo_error_handling.py +85 -0
- autocoder/common/llms/factory.py +142 -0
- autocoder/common/llms/manager.py +264 -0
- autocoder/common/llms/pricing.py +121 -0
- autocoder/common/llms/registry.py +288 -0
- autocoder/common/llms/schema.py +77 -0
- autocoder/common/llms/simple_demo.py +45 -0
- autocoder/common/llms/test_quick_model.py +116 -0
- autocoder/common/llms/test_remove_functionality.py +182 -0
- autocoder/common/llms/tests/__init__.py +1 -0
- autocoder/common/llms/tests/test_manager.py +330 -0
- autocoder/common/llms/tests/test_registry.py +364 -0
- autocoder/common/mcp_tools/__init__.py +62 -0
- autocoder/common/{mcp_tools.py → mcp_tools/executor.py} +49 -40
- autocoder/common/{mcp_hub.py → mcp_tools/hub.py} +42 -68
- autocoder/common/{mcp_server_install.py → mcp_tools/installer.py} +16 -28
- autocoder/common/{mcp_server.py → mcp_tools/server.py} +176 -48
- autocoder/common/mcp_tools/test_keyboard_interrupt.py +93 -0
- autocoder/common/mcp_tools/test_mcp_tools.py +391 -0
- autocoder/common/{mcp_server_types.py → mcp_tools/types.py} +121 -48
- autocoder/common/mcp_tools/verify_functionality.py +202 -0
- autocoder/common/model_speed_tester.py +32 -26
- autocoder/common/priority_directory_finder/__init__.py +142 -0
- autocoder/common/priority_directory_finder/examples.py +230 -0
- autocoder/common/priority_directory_finder/finder.py +283 -0
- autocoder/common/priority_directory_finder/models.py +236 -0
- autocoder/common/priority_directory_finder/test_priority_directory_finder.py +431 -0
- autocoder/common/project_scanner/__init__.py +18 -0
- autocoder/common/project_scanner/compat.py +77 -0
- autocoder/common/project_scanner/scanner.py +436 -0
- autocoder/common/project_tracker/__init__.py +27 -0
- autocoder/common/project_tracker/api.py +228 -0
- autocoder/common/project_tracker/demo.py +272 -0
- autocoder/common/project_tracker/tracker.py +487 -0
- autocoder/common/project_tracker/types.py +53 -0
- autocoder/common/pruner/__init__.py +67 -0
- autocoder/common/pruner/agentic_conversation_pruner.py +746 -0
- autocoder/common/{context_pruner.py → pruner/context_pruner.py} +137 -40
- autocoder/common/pruner/conversation_message_ids_api.py +386 -0
- autocoder/common/pruner/conversation_message_ids_manager.py +347 -0
- autocoder/common/pruner/conversation_message_ids_pruner.py +473 -0
- autocoder/common/pruner/conversation_normalizer.py +347 -0
- autocoder/common/{conversation_pruner.py → pruner/conversation_pruner.py} +26 -6
- autocoder/common/pruner/test_agentic_conversation_pruner.py +784 -0
- autocoder/common/pruner/test_context_pruner.py +546 -0
- autocoder/common/pruner/test_conversation_normalizer.py +502 -0
- autocoder/common/pruner/test_tool_content_detector.py +324 -0
- autocoder/common/pruner/tool_content_detector.py +227 -0
- autocoder/common/pruner/tools/__init__.py +18 -0
- autocoder/common/pruner/tools/query_message_ids.py +264 -0
- autocoder/common/pruner/tools/test_agentic_pruning_logic.py +432 -0
- autocoder/common/pruner/tools/test_message_ids_pruning_only.py +192 -0
- autocoder/common/pull_requests/__init__.py +9 -1
- autocoder/common/pull_requests/utils.py +122 -1
- autocoder/common/rag_manager/rag_manager.py +36 -40
- autocoder/common/rulefiles/__init__.py +53 -1
- autocoder/common/rulefiles/api.py +250 -0
- autocoder/common/rulefiles/core/__init__.py +14 -0
- autocoder/common/rulefiles/core/manager.py +241 -0
- autocoder/common/rulefiles/core/selector.py +805 -0
- autocoder/common/rulefiles/models/__init__.py +20 -0
- autocoder/common/rulefiles/models/index.py +16 -0
- autocoder/common/rulefiles/models/init_rule.py +18 -0
- autocoder/common/rulefiles/models/rule_file.py +18 -0
- autocoder/common/rulefiles/models/rule_relevance.py +14 -0
- autocoder/common/rulefiles/models/summary.py +16 -0
- autocoder/common/rulefiles/test_rulefiles.py +776 -0
- autocoder/common/rulefiles/utils/__init__.py +34 -0
- autocoder/common/rulefiles/utils/monitor.py +86 -0
- autocoder/common/rulefiles/utils/parser.py +230 -0
- autocoder/common/save_formatted_log.py +67 -10
- autocoder/common/search_replace.py +8 -1
- autocoder/common/search_replace_patch/__init__.py +24 -0
- autocoder/common/search_replace_patch/base.py +115 -0
- autocoder/common/search_replace_patch/manager.py +248 -0
- autocoder/common/search_replace_patch/patch_replacer.py +304 -0
- autocoder/common/search_replace_patch/similarity_replacer.py +306 -0
- autocoder/common/search_replace_patch/string_replacer.py +181 -0
- autocoder/common/search_replace_patch/tests/__init__.py +3 -0
- autocoder/common/search_replace_patch/tests/run_tests.py +126 -0
- autocoder/common/search_replace_patch/tests/test_base.py +188 -0
- autocoder/common/search_replace_patch/tests/test_empty_line_insert.py +233 -0
- autocoder/common/search_replace_patch/tests/test_integration.py +389 -0
- autocoder/common/search_replace_patch/tests/test_manager.py +351 -0
- autocoder/common/search_replace_patch/tests/test_patch_replacer.py +316 -0
- autocoder/common/search_replace_patch/tests/test_regex_replacer.py +306 -0
- autocoder/common/search_replace_patch/tests/test_similarity_replacer.py +384 -0
- autocoder/common/shell_commands/__init__.py +197 -0
- autocoder/common/shell_commands/background_process_notifier.py +346 -0
- autocoder/common/shell_commands/command_executor.py +1127 -0
- autocoder/common/shell_commands/error_recovery.py +541 -0
- autocoder/common/shell_commands/exceptions.py +120 -0
- autocoder/common/shell_commands/interactive_executor.py +476 -0
- autocoder/common/shell_commands/interactive_pexpect_process.py +623 -0
- autocoder/common/shell_commands/interactive_process.py +744 -0
- autocoder/common/shell_commands/interactive_session_manager.py +1014 -0
- autocoder/common/shell_commands/monitoring.py +529 -0
- autocoder/common/shell_commands/process_cleanup.py +386 -0
- autocoder/common/shell_commands/process_manager.py +606 -0
- autocoder/common/shell_commands/test_interactive_pexpect_process.py +281 -0
- autocoder/common/shell_commands/tests/__init__.py +6 -0
- autocoder/common/shell_commands/tests/conftest.py +118 -0
- autocoder/common/shell_commands/tests/test_background_process_notifier.py +703 -0
- autocoder/common/shell_commands/tests/test_command_executor.py +448 -0
- autocoder/common/shell_commands/tests/test_error_recovery.py +305 -0
- autocoder/common/shell_commands/tests/test_exceptions.py +299 -0
- autocoder/common/shell_commands/tests/test_execute_batch.py +588 -0
- autocoder/common/shell_commands/tests/test_indented_batch_commands.py +244 -0
- autocoder/common/shell_commands/tests/test_integration.py +664 -0
- autocoder/common/shell_commands/tests/test_monitoring.py +546 -0
- autocoder/common/shell_commands/tests/test_performance.py +632 -0
- autocoder/common/shell_commands/tests/test_process_cleanup.py +397 -0
- autocoder/common/shell_commands/tests/test_process_manager.py +606 -0
- autocoder/common/shell_commands/tests/test_timeout_config.py +343 -0
- autocoder/common/shell_commands/tests/test_timeout_manager.py +520 -0
- autocoder/common/shell_commands/timeout_config.py +315 -0
- autocoder/common/shell_commands/timeout_manager.py +352 -0
- autocoder/common/terminal_paste/__init__.py +14 -0
- autocoder/common/terminal_paste/demo.py +145 -0
- autocoder/common/terminal_paste/demo_paste_functionality.py +95 -0
- autocoder/common/terminal_paste/paste_handler.py +200 -0
- autocoder/common/terminal_paste/paste_manager.py +118 -0
- autocoder/common/terminal_paste/tests/__init__.py +1 -0
- autocoder/common/terminal_paste/tests/test_paste_handler.py +182 -0
- autocoder/common/terminal_paste/tests/test_paste_manager.py +126 -0
- autocoder/common/terminal_paste/utils.py +163 -0
- autocoder/common/test_autocoder_args.py +232 -0
- autocoder/common/test_env_manager.py +173 -0
- autocoder/common/test_env_manager_integration.py +159 -0
- autocoder/common/text_similarity/__init__.py +9 -0
- autocoder/common/text_similarity/demo.py +216 -0
- autocoder/common/text_similarity/examples.py +266 -0
- autocoder/common/text_similarity/test_text_similarity.py +306 -0
- autocoder/common/text_similarity/text_similarity.py +194 -0
- autocoder/common/text_similarity/utils.py +125 -0
- autocoder/common/todos/__init__.py +61 -0
- autocoder/common/todos/cache/__init__.py +16 -0
- autocoder/common/todos/cache/base_cache.py +89 -0
- autocoder/common/todos/cache/cache_manager.py +228 -0
- autocoder/common/todos/cache/memory_cache.py +225 -0
- autocoder/common/todos/config.py +155 -0
- autocoder/common/todos/exceptions.py +35 -0
- autocoder/common/todos/get_todo_manager.py +161 -0
- autocoder/common/todos/manager.py +537 -0
- autocoder/common/todos/models.py +239 -0
- autocoder/common/todos/storage/__init__.py +14 -0
- autocoder/common/todos/storage/base_storage.py +76 -0
- autocoder/common/todos/storage/file_storage.py +278 -0
- autocoder/common/tokens/__init__.py +15 -0
- autocoder/common/tokens/counter.py +44 -2
- autocoder/common/tools_manager/__init__.py +17 -0
- autocoder/common/tools_manager/examples.py +162 -0
- autocoder/common/tools_manager/manager.py +385 -0
- autocoder/common/tools_manager/models.py +39 -0
- autocoder/common/tools_manager/test_tools_manager.py +303 -0
- autocoder/common/tools_manager/utils.py +191 -0
- autocoder/common/v2/agent/agentic_callbacks.py +270 -0
- autocoder/common/v2/agent/agentic_edit.py +2729 -2052
- autocoder/common/v2/agent/agentic_edit_change_manager.py +474 -0
- autocoder/common/v2/agent/agentic_edit_tools/__init__.py +43 -2
- autocoder/common/v2/agent/agentic_edit_tools/ac_mod_list_tool_resolver.py +279 -0
- autocoder/common/v2/agent/agentic_edit_tools/ac_mod_read_tool_resolver.py +40 -0
- autocoder/common/v2/agent/agentic_edit_tools/ac_mod_write_tool_resolver.py +52 -0
- autocoder/common/v2/agent/agentic_edit_tools/ask_followup_question_tool_resolver.py +8 -0
- autocoder/common/v2/agent/agentic_edit_tools/background_task_tool_resolver.py +1167 -0
- autocoder/common/v2/agent/agentic_edit_tools/base_tool_resolver.py +2 -2
- autocoder/common/v2/agent/agentic_edit_tools/conversation_message_ids_read_tool_resolver.py +214 -0
- autocoder/common/v2/agent/agentic_edit_tools/conversation_message_ids_write_tool_resolver.py +299 -0
- autocoder/common/v2/agent/agentic_edit_tools/count_tokens_tool_resolver.py +290 -0
- autocoder/common/v2/agent/agentic_edit_tools/execute_command_tool_resolver.py +565 -30
- autocoder/common/v2/agent/agentic_edit_tools/execute_workflow_tool_resolver.py +485 -0
- autocoder/common/v2/agent/agentic_edit_tools/extract_to_text_tool_resolver.py +225 -0
- autocoder/common/v2/agent/agentic_edit_tools/lint_report.py +79 -0
- autocoder/common/v2/agent/agentic_edit_tools/linter_config_models.py +343 -0
- autocoder/common/v2/agent/agentic_edit_tools/linter_enabled_tool_resolver.py +189 -0
- autocoder/common/v2/agent/agentic_edit_tools/list_files_tool_resolver.py +169 -101
- autocoder/common/v2/agent/agentic_edit_tools/load_extra_document_tool_resolver.py +349 -0
- autocoder/common/v2/agent/agentic_edit_tools/read_file_tool_resolver.py +244 -51
- autocoder/common/v2/agent/agentic_edit_tools/replace_in_file_tool_resolver.py +667 -147
- autocoder/common/v2/agent/agentic_edit_tools/run_named_subagents_tool_resolver.py +691 -0
- autocoder/common/v2/agent/agentic_edit_tools/search_files_tool_resolver.py +409 -140
- autocoder/common/v2/agent/agentic_edit_tools/session_interactive_tool_resolver.py +115 -0
- autocoder/common/v2/agent/agentic_edit_tools/session_start_tool_resolver.py +190 -0
- autocoder/common/v2/agent/agentic_edit_tools/session_stop_tool_resolver.py +76 -0
- autocoder/common/v2/agent/agentic_edit_tools/test_write_to_file_tool_resolver.py +209 -194
- autocoder/common/v2/agent/agentic_edit_tools/todo_read_tool_resolver.py +135 -0
- autocoder/common/v2/agent/agentic_edit_tools/todo_write_tool_resolver.py +328 -0
- autocoder/common/v2/agent/agentic_edit_tools/use_mcp_tool_resolver.py +2 -2
- autocoder/common/v2/agent/agentic_edit_tools/web_crawl_tool_resolver.py +557 -0
- autocoder/common/v2/agent/agentic_edit_tools/web_search_tool_resolver.py +600 -0
- autocoder/common/v2/agent/agentic_edit_tools/write_to_file_tool_resolver.py +56 -121
- autocoder/common/v2/agent/agentic_edit_types.py +386 -10
- autocoder/common/v2/agent/runner/__init__.py +31 -0
- autocoder/common/v2/agent/runner/base_runner.py +92 -0
- autocoder/common/v2/agent/runner/file_based_event_runner.py +217 -0
- autocoder/common/v2/agent/runner/sdk_runner.py +182 -0
- autocoder/common/v2/agent/runner/terminal_runner.py +396 -0
- autocoder/common/v2/agent/runner/tool_display.py +589 -0
- autocoder/common/v2/agent/test_agentic_callbacks.py +265 -0
- autocoder/common/v2/agent/test_agentic_edit.py +194 -0
- autocoder/common/v2/agent/tool_caller/__init__.py +24 -0
- autocoder/common/v2/agent/tool_caller/default_tool_resolver_map.py +135 -0
- autocoder/common/v2/agent/tool_caller/integration_test.py +172 -0
- autocoder/common/v2/agent/tool_caller/plugins/__init__.py +14 -0
- autocoder/common/v2/agent/tool_caller/plugins/base_plugin.py +126 -0
- autocoder/common/v2/agent/tool_caller/plugins/examples/__init__.py +13 -0
- autocoder/common/v2/agent/tool_caller/plugins/examples/logging_plugin.py +164 -0
- autocoder/common/v2/agent/tool_caller/plugins/examples/security_filter_plugin.py +198 -0
- autocoder/common/v2/agent/tool_caller/plugins/plugin_interface.py +141 -0
- autocoder/common/v2/agent/tool_caller/test_tool_caller.py +278 -0
- autocoder/common/v2/agent/tool_caller/tool_call_plugin_manager.py +331 -0
- autocoder/common/v2/agent/tool_caller/tool_caller.py +337 -0
- autocoder/common/v2/agent/tool_caller/usage_example.py +193 -0
- autocoder/common/v2/code_agentic_editblock_manager.py +4 -4
- autocoder/common/v2/code_auto_generate.py +136 -78
- autocoder/common/v2/code_auto_generate_diff.py +135 -79
- autocoder/common/v2/code_auto_generate_editblock.py +174 -99
- autocoder/common/v2/code_auto_generate_strict_diff.py +151 -71
- autocoder/common/v2/code_auto_merge.py +1 -1
- autocoder/common/v2/code_auto_merge_editblock.py +13 -1
- autocoder/common/v2/code_diff_manager.py +3 -3
- autocoder/common/v2/code_editblock_manager.py +4 -14
- autocoder/common/v2/code_manager.py +1 -1
- autocoder/common/v2/code_strict_diff_manager.py +2 -2
- autocoder/common/wrap_llm_hint/__init__.py +10 -0
- autocoder/common/wrap_llm_hint/test_wrap_llm_hint.py +1067 -0
- autocoder/common/wrap_llm_hint/utils.py +432 -0
- autocoder/common/wrap_llm_hint/wrap_llm_hint.py +323 -0
- autocoder/completer/__init__.py +8 -0
- autocoder/completer/command_completer_v2.py +1051 -0
- autocoder/default_project/__init__.py +501 -0
- autocoder/dispacher/__init__.py +4 -12
- autocoder/dispacher/actions/action.py +165 -7
- autocoder/dispacher/actions/plugins/action_regex_project.py +2 -2
- autocoder/index/entry.py +117 -125
- autocoder/{agent → index/filter}/agentic_filter.py +323 -334
- autocoder/index/filter/normal_filter.py +5 -11
- autocoder/index/filter/quick_filter.py +1 -1
- autocoder/index/index.py +36 -9
- autocoder/index/tests/__init__.py +1 -0
- autocoder/index/tests/run_tests.py +195 -0
- autocoder/index/tests/test_entry.py +303 -0
- autocoder/index/tests/test_index_manager.py +314 -0
- autocoder/index/tests/test_module_integration.py +300 -0
- autocoder/index/tests/test_symbols_utils.py +183 -0
- autocoder/inner/__init__.py +4 -0
- autocoder/inner/agentic.py +932 -0
- autocoder/inner/async_command_handler.py +992 -0
- autocoder/inner/conversation_command_handlers.py +623 -0
- autocoder/inner/merge_command_handler.py +213 -0
- autocoder/inner/queue_command_handler.py +684 -0
- autocoder/models.py +95 -266
- autocoder/plugins/git_helper_plugin.py +31 -29
- autocoder/plugins/token_helper_plugin.py +156 -37
- autocoder/pyproject/__init__.py +32 -29
- autocoder/rag/agentic_rag.py +215 -75
- autocoder/rag/cache/simple_cache.py +1 -2
- autocoder/rag/loaders/image_loader.py +1 -1
- autocoder/rag/long_context_rag.py +42 -26
- autocoder/rag/qa_conversation_strategy.py +1 -1
- autocoder/rag/terminal/__init__.py +17 -0
- autocoder/rag/terminal/args.py +581 -0
- autocoder/rag/terminal/bootstrap.py +61 -0
- autocoder/rag/terminal/command_handlers.py +653 -0
- autocoder/rag/terminal/formatters/__init__.py +20 -0
- autocoder/rag/terminal/formatters/base.py +70 -0
- autocoder/rag/terminal/formatters/json_format.py +66 -0
- autocoder/rag/terminal/formatters/stream_json.py +95 -0
- autocoder/rag/terminal/formatters/text.py +28 -0
- autocoder/rag/terminal/init.py +120 -0
- autocoder/rag/terminal/utils.py +106 -0
- autocoder/rag/test_agentic_rag.py +389 -0
- autocoder/rag/test_doc_filter.py +3 -3
- autocoder/rag/test_long_context_rag.py +1 -1
- autocoder/rag/test_token_limiter.py +517 -10
- autocoder/rag/token_counter.py +3 -0
- autocoder/rag/token_limiter.py +19 -15
- autocoder/rag/tools/__init__.py +26 -2
- autocoder/rag/tools/bochaai_example.py +343 -0
- autocoder/rag/tools/bochaai_sdk.py +541 -0
- autocoder/rag/tools/metaso_example.py +268 -0
- autocoder/rag/tools/metaso_sdk.py +417 -0
- autocoder/rag/tools/recall_tool.py +28 -7
- autocoder/rag/tools/run_integration_tests.py +204 -0
- autocoder/rag/tools/test_all_providers.py +318 -0
- autocoder/rag/tools/test_bochaai_integration.py +482 -0
- autocoder/rag/tools/test_final_integration.py +215 -0
- autocoder/rag/tools/test_metaso_integration.py +424 -0
- autocoder/rag/tools/test_metaso_real.py +171 -0
- autocoder/rag/tools/test_web_crawl_tool.py +639 -0
- autocoder/rag/tools/test_web_search_tool.py +509 -0
- autocoder/rag/tools/todo_read_tool.py +202 -0
- autocoder/rag/tools/todo_write_tool.py +412 -0
- autocoder/rag/tools/web_crawl_tool.py +634 -0
- autocoder/rag/tools/web_search_tool.py +558 -0
- autocoder/rag/tools/web_tools_example.py +119 -0
- autocoder/rag/types.py +16 -0
- autocoder/rag/variable_holder.py +4 -2
- autocoder/rags.py +86 -79
- autocoder/regexproject/__init__.py +23 -21
- autocoder/run_context.py +9 -0
- autocoder/sdk/__init__.py +50 -161
- autocoder/sdk/api.py +370 -0
- autocoder/sdk/async_runner/__init__.py +26 -0
- autocoder/sdk/async_runner/async_executor.py +650 -0
- autocoder/sdk/async_runner/async_handler.py +356 -0
- autocoder/sdk/async_runner/markdown_processor.py +595 -0
- autocoder/sdk/async_runner/task_metadata.py +284 -0
- autocoder/sdk/async_runner/worktree_manager.py +438 -0
- autocoder/sdk/cli/__init__.py +2 -5
- autocoder/sdk/cli/formatters.py +28 -204
- autocoder/sdk/cli/handlers.py +77 -44
- autocoder/sdk/cli/main.py +158 -170
- autocoder/sdk/cli/options.py +95 -22
- autocoder/sdk/constants.py +139 -51
- autocoder/sdk/core/auto_coder_core.py +484 -267
- autocoder/sdk/core/bridge.py +298 -118
- autocoder/sdk/exceptions.py +18 -12
- autocoder/sdk/formatters/__init__.py +19 -0
- autocoder/sdk/formatters/input.py +64 -0
- autocoder/sdk/formatters/output.py +247 -0
- autocoder/sdk/formatters/stream.py +54 -0
- autocoder/sdk/models/__init__.py +6 -5
- autocoder/sdk/models/options.py +55 -18
- autocoder/sdk/utils/formatters.py +27 -195
- autocoder/suffixproject/__init__.py +28 -25
- autocoder/terminal/__init__.py +14 -0
- autocoder/terminal/app.py +454 -0
- autocoder/terminal/args.py +32 -0
- autocoder/terminal/bootstrap.py +178 -0
- autocoder/terminal/command_processor.py +521 -0
- autocoder/terminal/command_registry.py +57 -0
- autocoder/terminal/help.py +97 -0
- autocoder/terminal/tasks/__init__.py +5 -0
- autocoder/terminal/tasks/background.py +77 -0
- autocoder/terminal/tasks/task_event.py +70 -0
- autocoder/terminal/ui/__init__.py +13 -0
- autocoder/terminal/ui/completer.py +268 -0
- autocoder/terminal/ui/keybindings.py +75 -0
- autocoder/terminal/ui/session.py +41 -0
- autocoder/terminal/ui/toolbar.py +64 -0
- autocoder/terminal/utils/__init__.py +13 -0
- autocoder/terminal/utils/errors.py +18 -0
- autocoder/terminal/utils/paths.py +19 -0
- autocoder/terminal/utils/shell.py +43 -0
- autocoder/terminal_v3/__init__.py +10 -0
- autocoder/terminal_v3/app.py +201 -0
- autocoder/terminal_v3/handlers/__init__.py +5 -0
- autocoder/terminal_v3/handlers/command_handler.py +131 -0
- autocoder/terminal_v3/models/__init__.py +6 -0
- autocoder/terminal_v3/models/conversation_buffer.py +214 -0
- autocoder/terminal_v3/models/message.py +50 -0
- autocoder/terminal_v3/models/tool_display.py +247 -0
- autocoder/terminal_v3/ui/__init__.py +7 -0
- autocoder/terminal_v3/ui/keybindings.py +56 -0
- autocoder/terminal_v3/ui/layout.py +141 -0
- autocoder/terminal_v3/ui/styles.py +43 -0
- autocoder/tsproject/__init__.py +23 -23
- autocoder/utils/auto_coder_utils/chat_stream_out.py +1 -1
- autocoder/utils/llms.py +88 -80
- autocoder/utils/math_utils.py +101 -0
- autocoder/utils/model_provider_selector.py +16 -4
- autocoder/utils/operate_config_api.py +33 -5
- autocoder/utils/thread_utils.py +2 -2
- autocoder/version.py +4 -2
- autocoder/workflow_agents/__init__.py +84 -0
- autocoder/workflow_agents/agent.py +143 -0
- autocoder/workflow_agents/exceptions.py +573 -0
- autocoder/workflow_agents/executor.py +489 -0
- autocoder/workflow_agents/loader.py +737 -0
- autocoder/workflow_agents/runner.py +267 -0
- autocoder/workflow_agents/types.py +172 -0
- autocoder/workflow_agents/utils.py +434 -0
- autocoder/workflow_agents/workflow_manager.py +211 -0
- auto_coder-0.1.400.dist-info/METADATA +0 -396
- auto_coder-0.1.400.dist-info/RECORD +0 -425
- auto_coder-0.1.400.dist-info/licenses/LICENSE +0 -201
- autocoder/auto_coder_server.py +0 -672
- autocoder/benchmark.py +0 -138
- autocoder/common/ac_style_command_parser/example.py +0 -7
- autocoder/common/cleaner.py +0 -31
- autocoder/common/command_completer_v2.py +0 -615
- autocoder/common/directory_cache/__init__.py +0 -1
- autocoder/common/directory_cache/cache.py +0 -192
- autocoder/common/directory_cache/test_cache.py +0 -190
- autocoder/common/file_checkpoint/examples.py +0 -217
- autocoder/common/llm_friendly_package_example.py +0 -138
- autocoder/common/llm_friendly_package_test.py +0 -63
- autocoder/common/pull_requests/test_module.py +0 -1
- autocoder/common/rulefiles/autocoderrules_utils.py +0 -484
- autocoder/common/text.py +0 -30
- autocoder/common/v2/agent/agentic_edit_tools/list_package_info_tool_resolver.py +0 -42
- autocoder/common/v2/agent/agentic_edit_tools/test_execute_command_tool_resolver.py +0 -70
- autocoder/common/v2/agent/agentic_edit_tools/test_search_files_tool_resolver.py +0 -163
- autocoder/common/v2/agent/agentic_tool_display.py +0 -183
- autocoder/plugins/dynamic_completion_example.py +0 -148
- autocoder/plugins/sample_plugin.py +0 -160
- autocoder/sdk/cli/__main__.py +0 -26
- autocoder/sdk/cli/completion_wrapper.py +0 -38
- autocoder/sdk/cli/install_completion.py +0 -301
- autocoder/sdk/models/messages.py +0 -209
- autocoder/sdk/session/__init__.py +0 -32
- autocoder/sdk/session/session.py +0 -106
- autocoder/sdk/session/session_manager.py +0 -56
- {auto_coder-0.1.400.dist-info → auto_coder-2.0.0.dist-info}/top_level.txt +0 -0
- /autocoder/{sdk/example.py → common/agent_query_queue/__init__.py} +0 -0
|
@@ -0,0 +1,634 @@
|
|
|
1
|
+
"""
|
|
2
|
+
WebCrawlTool Module
|
|
3
|
+
|
|
4
|
+
This module implements WebCrawlTool and WebCrawlToolResolver classes for providing
|
|
5
|
+
web crawling functionality based on Firecrawl, Metaso in the BaseAgent framework.
|
|
6
|
+
Supports concurrent crawling with multiple API keys using thread pools.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
import traceback
|
|
11
|
+
import time
|
|
12
|
+
import json
|
|
13
|
+
from typing import Dict, Any, List, Optional
|
|
14
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
15
|
+
from threading import Lock
|
|
16
|
+
|
|
17
|
+
import byzerllm
|
|
18
|
+
from loguru import logger
|
|
19
|
+
|
|
20
|
+
from autocoder.agent.base_agentic.types import BaseTool, ToolResult
|
|
21
|
+
from autocoder.agent.base_agentic.tool_registry import ToolRegistry
|
|
22
|
+
from autocoder.agent.base_agentic.tools.base_tool_resolver import BaseToolResolver
|
|
23
|
+
from autocoder.agent.base_agentic.types import ToolDescription, ToolExample
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class WebCrawlTool(BaseTool):
|
|
27
|
+
"""Web crawling tool using Firecrawl, Metaso for web crawling"""
|
|
28
|
+
url: str # URL to crawl
|
|
29
|
+
limit: Optional[int] = 10 # Page crawling limit
|
|
30
|
+
scrape_options: Optional[str] = None # Scraping options in JSON string format
|
|
31
|
+
exclude_paths: Optional[str] = None # Excluded paths, comma-separated
|
|
32
|
+
include_paths: Optional[str] = None # Included paths, comma-separated
|
|
33
|
+
max_depth: Optional[int] = None # Maximum crawling depth
|
|
34
|
+
allow_subdomains: Optional[str] = "false" # Whether to allow subdomains, true/false
|
|
35
|
+
crawl_entire_domain: Optional[str] = "false" # Whether to crawl entire domain, true/false
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class WebCrawlToolResolver(BaseToolResolver):
|
|
39
|
+
"""Web crawling tool resolver implementing concurrent crawling logic"""
|
|
40
|
+
|
|
41
|
+
def __init__(self, agent, tool, args):
|
|
42
|
+
super().__init__(agent, tool, args)
|
|
43
|
+
self.tool: WebCrawlTool = tool
|
|
44
|
+
self._results_lock = Lock()
|
|
45
|
+
self._all_results = []
|
|
46
|
+
|
|
47
|
+
def _get_available_providers(self) -> List[Dict[str, Any]]:
|
|
48
|
+
"""Get all available provider configurations"""
|
|
49
|
+
providers = []
|
|
50
|
+
# Check Metaso keys
|
|
51
|
+
metaso_keys = []
|
|
52
|
+
if self.args.metaso_api_key:
|
|
53
|
+
if ',' in self.args.metaso_api_key:
|
|
54
|
+
metaso_keys = [key.strip() for key in self.args.metaso_api_key.split(',') if key.strip()]
|
|
55
|
+
else:
|
|
56
|
+
metaso_keys = [self.args.metaso_api_key]
|
|
57
|
+
|
|
58
|
+
for key in metaso_keys:
|
|
59
|
+
providers.append({
|
|
60
|
+
'type': 'metaso',
|
|
61
|
+
'api_key': key,
|
|
62
|
+
'priority': 2
|
|
63
|
+
})
|
|
64
|
+
|
|
65
|
+
# Check Firecrawl keys
|
|
66
|
+
firecrawl_keys = []
|
|
67
|
+
if self.args.firecrawl_api_key:
|
|
68
|
+
if ',' in self.args.firecrawl_api_key:
|
|
69
|
+
firecrawl_keys = [key.strip() for key in self.args.firecrawl_api_key.split(',') if key.strip()]
|
|
70
|
+
else:
|
|
71
|
+
firecrawl_keys = [self.args.firecrawl_api_key]
|
|
72
|
+
|
|
73
|
+
for key in firecrawl_keys:
|
|
74
|
+
providers.append({
|
|
75
|
+
'type': 'firecrawl',
|
|
76
|
+
'api_key': key,
|
|
77
|
+
'priority': 3
|
|
78
|
+
})
|
|
79
|
+
|
|
80
|
+
# Sort by priority
|
|
81
|
+
providers.sort(key=lambda x: x['priority'])
|
|
82
|
+
return providers
|
|
83
|
+
|
|
84
|
+
def _crawl_with_metaso(self, api_key: str) -> ToolResult:
|
|
85
|
+
"""Use Metaso for crawling (single page reading or multi-page crawling)"""
|
|
86
|
+
logger.info(f"🔍 Starting Metaso crawl (key: ...{api_key[-4:]}): {self.tool.url}")
|
|
87
|
+
try:
|
|
88
|
+
# Dynamically import to avoid dependency issues
|
|
89
|
+
try:
|
|
90
|
+
from autocoder.rag.tools.metaso_sdk import MetasoClient
|
|
91
|
+
except ImportError:
|
|
92
|
+
return ToolResult(
|
|
93
|
+
success=False,
|
|
94
|
+
message="Metaso SDK not installed, please check dependencies",
|
|
95
|
+
content=[]
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
# Initialize Metaso client
|
|
99
|
+
client = MetasoClient(api_key=api_key)
|
|
100
|
+
|
|
101
|
+
# Prepare crawling parameters
|
|
102
|
+
exclude_paths_list = None
|
|
103
|
+
if self.tool.exclude_paths:
|
|
104
|
+
exclude_paths_list = [p.strip() for p in self.tool.exclude_paths.split(',') if p.strip()]
|
|
105
|
+
|
|
106
|
+
include_paths_list = None
|
|
107
|
+
if self.tool.include_paths:
|
|
108
|
+
include_paths_list = [p.strip() for p in self.tool.include_paths.split(',') if p.strip()]
|
|
109
|
+
|
|
110
|
+
allow_subdomains = self.tool.allow_subdomains.lower() == "true"
|
|
111
|
+
|
|
112
|
+
# If only crawling one page (limit=1), use read method directly
|
|
113
|
+
if self.tool.limit == 1:
|
|
114
|
+
logger.info(f"Using Metaso to read single page: {self.tool.url}")
|
|
115
|
+
content = client.read(self.tool.url, format="text/plain")
|
|
116
|
+
|
|
117
|
+
if content.startswith("Error:"):
|
|
118
|
+
return ToolResult(
|
|
119
|
+
success=False,
|
|
120
|
+
message=f"Metaso reading failed: {content}",
|
|
121
|
+
content=[]
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
result_item = {
|
|
125
|
+
"url": self.tool.url,
|
|
126
|
+
"title": "",
|
|
127
|
+
"content": content,
|
|
128
|
+
"markdown": content,
|
|
129
|
+
"links": [],
|
|
130
|
+
"metadata": {
|
|
131
|
+
"source": "metaso",
|
|
132
|
+
"api_key_suffix": api_key[-4:] if len(api_key) > 4 else "****"
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
return ToolResult(
|
|
137
|
+
success=True,
|
|
138
|
+
message=f"Successfully read 1 page (using Metaso)",
|
|
139
|
+
content=[result_item]
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
# Multi-page crawling
|
|
143
|
+
logger.info(f"Using Metaso to start web crawling, URL: {self.tool.url}")
|
|
144
|
+
crawl_results = client.crawl(
|
|
145
|
+
url=self.tool.url,
|
|
146
|
+
limit=self.tool.limit,
|
|
147
|
+
max_depth=self.tool.max_depth,
|
|
148
|
+
exclude_paths=exclude_paths_list,
|
|
149
|
+
include_paths=include_paths_list,
|
|
150
|
+
allow_subdomains=allow_subdomains
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
if not crawl_results:
|
|
154
|
+
return ToolResult(
|
|
155
|
+
success=False,
|
|
156
|
+
message="Metaso crawling returned no results",
|
|
157
|
+
content=[]
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
# Add API key identifier to each result
|
|
161
|
+
for result in crawl_results:
|
|
162
|
+
if 'metadata' not in result:
|
|
163
|
+
result['metadata'] = {}
|
|
164
|
+
result['metadata']['api_key_suffix'] = api_key[-4:] if len(api_key) > 4 else "****"
|
|
165
|
+
|
|
166
|
+
return ToolResult(
|
|
167
|
+
success=True,
|
|
168
|
+
message=f"Successfully crawled {len(crawl_results)} pages (using Metaso)",
|
|
169
|
+
content=crawl_results
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
except Exception as e:
|
|
173
|
+
logger.error(f"❌ Metaso crawling failed (key: ...{api_key[-4:]}): {str(e)}")
|
|
174
|
+
return ToolResult(
|
|
175
|
+
success=False,
|
|
176
|
+
message=f"Metaso crawling failed (key: ...{api_key[-4:]}): {str(e)}",
|
|
177
|
+
content=[]
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
def _crawl_with_firecrawl(self, api_key: str) -> ToolResult:
|
|
181
|
+
"""Use Firecrawl for crawling with simplified API"""
|
|
182
|
+
logger.info(f"🔥 Starting Firecrawl crawl (key: ...{api_key[-4:]}): {self.tool.url}")
|
|
183
|
+
try:
|
|
184
|
+
# Import Firecrawl SDK - try multiple import options
|
|
185
|
+
try:
|
|
186
|
+
from firecrawl import FirecrawlApp
|
|
187
|
+
use_app_class = True
|
|
188
|
+
except ImportError:
|
|
189
|
+
try:
|
|
190
|
+
from firecrawl import Firecrawl
|
|
191
|
+
use_app_class = False
|
|
192
|
+
except ImportError:
|
|
193
|
+
return ToolResult(
|
|
194
|
+
success=False,
|
|
195
|
+
message="Firecrawl SDK not installed, please run: pip install firecrawl-py",
|
|
196
|
+
content=[]
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
# Initialize Firecrawl client using the available class
|
|
200
|
+
if use_app_class:
|
|
201
|
+
firecrawl = FirecrawlApp(api_key=api_key)
|
|
202
|
+
logger.info("Using FirecrawlApp class")
|
|
203
|
+
else:
|
|
204
|
+
firecrawl = Firecrawl(api_key=api_key)
|
|
205
|
+
logger.info("Using Firecrawl class")
|
|
206
|
+
|
|
207
|
+
# For single page (limit=1), use simple scrape method
|
|
208
|
+
if self.tool.limit == 1:
|
|
209
|
+
logger.info("Using simple scrape for single page")
|
|
210
|
+
|
|
211
|
+
# Both FirecrawlApp and Firecrawl use the same 'scrape' method
|
|
212
|
+
scrape_result = firecrawl.scrape(self.tool.url, formats=['markdown'])
|
|
213
|
+
|
|
214
|
+
# Process single page result
|
|
215
|
+
if not scrape_result:
|
|
216
|
+
return ToolResult(
|
|
217
|
+
success=False,
|
|
218
|
+
message="Firecrawl scrape returned empty result",
|
|
219
|
+
content=[]
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
# Convert to consistent format - scrape_result is a Document object
|
|
223
|
+
title = ''
|
|
224
|
+
if scrape_result.metadata and hasattr(scrape_result.metadata, 'title'):
|
|
225
|
+
title = scrape_result.metadata.title or ''
|
|
226
|
+
|
|
227
|
+
result_item = {
|
|
228
|
+
"url": self.tool.url,
|
|
229
|
+
"title": title,
|
|
230
|
+
"content": scrape_result.markdown or '',
|
|
231
|
+
"markdown": scrape_result.markdown or '',
|
|
232
|
+
"links": scrape_result.links or [],
|
|
233
|
+
"metadata": {
|
|
234
|
+
"source": "firecrawl",
|
|
235
|
+
"api_key_suffix": api_key[-4:] if len(api_key) > 4 else "****"
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
# Add original metadata if available
|
|
240
|
+
if scrape_result.metadata:
|
|
241
|
+
if hasattr(scrape_result.metadata, 'dict'):
|
|
242
|
+
result_item['metadata'].update(scrape_result.metadata.dict())
|
|
243
|
+
elif hasattr(scrape_result.metadata, '__dict__'):
|
|
244
|
+
result_item['metadata'].update(scrape_result.metadata.__dict__)
|
|
245
|
+
|
|
246
|
+
return ToolResult(
|
|
247
|
+
success=True,
|
|
248
|
+
message=f"Successfully scraped 1 page using Firecrawl (key: ...{api_key[-4:]})",
|
|
249
|
+
content=[result_item]
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
# Multi-page crawling using the unified 'crawl' method
|
|
253
|
+
logger.info(f"Using multi-page crawling for {self.tool.limit} pages")
|
|
254
|
+
|
|
255
|
+
try:
|
|
256
|
+
# Prepare crawl parameters
|
|
257
|
+
crawl_options = {}
|
|
258
|
+
|
|
259
|
+
if self.tool.max_depth is not None:
|
|
260
|
+
crawl_options['max_discovery_depth'] = self.tool.max_depth
|
|
261
|
+
if self.tool.limit is not None:
|
|
262
|
+
crawl_options['limit'] = self.tool.limit
|
|
263
|
+
if self.tool.exclude_paths:
|
|
264
|
+
crawl_options['exclude_paths'] = [p.strip() for p in self.tool.exclude_paths.split(',') if p.strip()]
|
|
265
|
+
if self.tool.include_paths:
|
|
266
|
+
crawl_options['include_paths'] = [p.strip() for p in self.tool.include_paths.split(',') if p.strip()]
|
|
267
|
+
if self.tool.allow_subdomains is not None:
|
|
268
|
+
crawl_options['allow_subdomains'] = self.tool.allow_subdomains.lower() == "true"
|
|
269
|
+
if self.tool.crawl_entire_domain is not None:
|
|
270
|
+
crawl_options['crawl_entire_domain'] = self.tool.crawl_entire_domain.lower() == "true"
|
|
271
|
+
|
|
272
|
+
logger.info(f"Starting crawl with options: {crawl_options}")
|
|
273
|
+
|
|
274
|
+
# Both FirecrawlApp and Firecrawl use the same 'crawl' method
|
|
275
|
+
crawl_result = firecrawl.crawl(self.tool.url, **crawl_options)
|
|
276
|
+
|
|
277
|
+
# Process crawl results - crawl_result is a CrawlJob object
|
|
278
|
+
crawl_results = []
|
|
279
|
+
|
|
280
|
+
# CrawlJob.data contains List[Document]
|
|
281
|
+
data = crawl_result.data or []
|
|
282
|
+
logger.info(f"Processing {len(data)} crawled documents")
|
|
283
|
+
|
|
284
|
+
for doc in data:
|
|
285
|
+
# doc is a Document object
|
|
286
|
+
url = ''
|
|
287
|
+
title = ''
|
|
288
|
+
if doc.metadata:
|
|
289
|
+
if hasattr(doc.metadata, 'source_url'):
|
|
290
|
+
url = doc.metadata.source_url or ''
|
|
291
|
+
if hasattr(doc.metadata, 'title'):
|
|
292
|
+
title = doc.metadata.title or ''
|
|
293
|
+
|
|
294
|
+
result_item = {
|
|
295
|
+
"url": url,
|
|
296
|
+
"title": title,
|
|
297
|
+
"content": doc.markdown or '',
|
|
298
|
+
"markdown": doc.markdown or '',
|
|
299
|
+
"links": doc.links or [],
|
|
300
|
+
"metadata": {
|
|
301
|
+
"source": "firecrawl",
|
|
302
|
+
"api_key_suffix": api_key[-4:] if len(api_key) > 4 else "****"
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
# Add original metadata if available
|
|
307
|
+
if doc.metadata:
|
|
308
|
+
try:
|
|
309
|
+
if hasattr(doc.metadata, 'dict'):
|
|
310
|
+
result_item['metadata'].update(doc.metadata.dict())
|
|
311
|
+
elif hasattr(doc.metadata, '__dict__'):
|
|
312
|
+
result_item['metadata'].update(doc.metadata.__dict__)
|
|
313
|
+
except Exception as e:
|
|
314
|
+
logger.warning(f"Failed to extract metadata: {e}")
|
|
315
|
+
|
|
316
|
+
crawl_results.append(result_item)
|
|
317
|
+
|
|
318
|
+
return ToolResult(
|
|
319
|
+
success=True,
|
|
320
|
+
message=f"Successfully crawled {len(crawl_results)} pages using Firecrawl (key: ...{api_key[-4:]})",
|
|
321
|
+
content=crawl_results
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
except Exception as crawl_error:
|
|
325
|
+
logger.error(f"Multi-page crawling failed: {crawl_error}")
|
|
326
|
+
# Fallback to single page scrape
|
|
327
|
+
logger.info("Attempting fallback to single page scrape")
|
|
328
|
+
try:
|
|
329
|
+
scrape_result = firecrawl.scrape(self.tool.url, formats=['markdown'])
|
|
330
|
+
|
|
331
|
+
title = ''
|
|
332
|
+
if scrape_result.metadata and hasattr(scrape_result.metadata, 'title'):
|
|
333
|
+
title = scrape_result.metadata.title or ''
|
|
334
|
+
|
|
335
|
+
result_item = {
|
|
336
|
+
"url": self.tool.url,
|
|
337
|
+
"title": title,
|
|
338
|
+
"content": scrape_result.markdown or '',
|
|
339
|
+
"markdown": scrape_result.markdown or '',
|
|
340
|
+
"links": scrape_result.links or [],
|
|
341
|
+
"metadata": {
|
|
342
|
+
"source": "firecrawl",
|
|
343
|
+
"api_key_suffix": api_key[-4:] if len(api_key) > 4 else "****"
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
# Add original metadata if available
|
|
348
|
+
if scrape_result.metadata:
|
|
349
|
+
if hasattr(scrape_result.metadata, 'dict'):
|
|
350
|
+
result_item['metadata'].update(scrape_result.metadata.dict())
|
|
351
|
+
elif hasattr(scrape_result.metadata, '__dict__'):
|
|
352
|
+
result_item['metadata'].update(scrape_result.metadata.__dict__)
|
|
353
|
+
|
|
354
|
+
return ToolResult(
|
|
355
|
+
success=True,
|
|
356
|
+
message=f"Fallback successful: scraped 1 page using Firecrawl (key: ...{api_key[-4:]}) - multi-page failed: {str(crawl_error)}",
|
|
357
|
+
content=[result_item]
|
|
358
|
+
)
|
|
359
|
+
except Exception as fallback_error:
|
|
360
|
+
return ToolResult(
|
|
361
|
+
success=False,
|
|
362
|
+
message=f"Both crawling and fallback failed. Crawl error: {str(crawl_error)}, Fallback error: {str(fallback_error)}",
|
|
363
|
+
content=[]
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
except Exception as e:
|
|
367
|
+
logger.error(f"❌ Firecrawl crawling failed (key: ...{api_key[-4:]}): {str(e)}")
|
|
368
|
+
return ToolResult(
|
|
369
|
+
success=False,
|
|
370
|
+
message=f"Firecrawl crawling failed (key: ...{api_key[-4:]}): {str(e)}",
|
|
371
|
+
content=[]
|
|
372
|
+
)
|
|
373
|
+
|
|
374
|
+
def _crawl_with_provider(self, provider: Dict[str, Any]) -> ToolResult:
|
|
375
|
+
"""Use specified provider for crawling"""
|
|
376
|
+
provider_type = provider['type']
|
|
377
|
+
api_key = provider['api_key']
|
|
378
|
+
|
|
379
|
+
if provider_type == 'metaso':
|
|
380
|
+
return self._crawl_with_metaso(api_key)
|
|
381
|
+
elif provider_type == 'firecrawl':
|
|
382
|
+
return self._crawl_with_firecrawl(api_key)
|
|
383
|
+
else:
|
|
384
|
+
return ToolResult(
|
|
385
|
+
success=False,
|
|
386
|
+
message=f"Unsupported provider type: {provider_type}",
|
|
387
|
+
content=[]
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
def _merge_results(self, results: List[ToolResult]) -> ToolResult:
|
|
391
|
+
"""Merge multiple crawling results"""
|
|
392
|
+
successful_results = [r for r in results if r.success]
|
|
393
|
+
failed_results = [r for r in results if not r.success]
|
|
394
|
+
|
|
395
|
+
logger.info(f"📄 Merging results: {len(successful_results)} successful, {len(failed_results)} failed")
|
|
396
|
+
|
|
397
|
+
if not successful_results:
|
|
398
|
+
# All requests failed
|
|
399
|
+
error_messages = [r.message for r in failed_results]
|
|
400
|
+
return ToolResult(
|
|
401
|
+
success=False,
|
|
402
|
+
message=f"All crawling requests failed: {'; '.join(error_messages)}",
|
|
403
|
+
content=[]
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
# Merge successful results
|
|
407
|
+
all_content = []
|
|
408
|
+
total_pages = 0
|
|
409
|
+
providers_used = set()
|
|
410
|
+
|
|
411
|
+
for result in successful_results:
|
|
412
|
+
if result.content:
|
|
413
|
+
all_content.extend(result.content)
|
|
414
|
+
total_pages += len(result.content)
|
|
415
|
+
# Extract provider information from results
|
|
416
|
+
for item in result.content:
|
|
417
|
+
if 'metadata' in item and 'source' in item['metadata']:
|
|
418
|
+
providers_used.add(item['metadata']['source'])
|
|
419
|
+
elif 'metadata' in item and 'api_key_suffix' in item['metadata']:
|
|
420
|
+
# Infer provider from API key suffix
|
|
421
|
+
if 'metaso' in result.message.lower():
|
|
422
|
+
providers_used.add('metaso')
|
|
423
|
+
elif 'firecrawl' in result.message.lower():
|
|
424
|
+
providers_used.add('firecrawl')
|
|
425
|
+
|
|
426
|
+
# Deduplicate (based on URL + source combination)
|
|
427
|
+
# Keep results from different sources even if they have the same URL
|
|
428
|
+
seen_url_source_pairs = set()
|
|
429
|
+
unique_content = []
|
|
430
|
+
for item in all_content:
|
|
431
|
+
url = item.get('url', '')
|
|
432
|
+
source = item.get('metadata', {}).get('source', 'unknown')
|
|
433
|
+
url_source_key = (url, source)
|
|
434
|
+
|
|
435
|
+
if url and url_source_key not in seen_url_source_pairs:
|
|
436
|
+
seen_url_source_pairs.add(url_source_key)
|
|
437
|
+
unique_content.append(item)
|
|
438
|
+
elif not url: # If no URL, also keep it
|
|
439
|
+
unique_content.append(item)
|
|
440
|
+
|
|
441
|
+
# If we have multiple results for the same URL from different sources, log it
|
|
442
|
+
url_count = {}
|
|
443
|
+
for item in unique_content:
|
|
444
|
+
url = item.get('url', '')
|
|
445
|
+
if url:
|
|
446
|
+
url_count[url] = url_count.get(url, 0) + 1
|
|
447
|
+
|
|
448
|
+
multi_source_urls = [url for url, count in url_count.items() if count > 1]
|
|
449
|
+
if multi_source_urls:
|
|
450
|
+
logger.info(f"📊 Found {len(multi_source_urls)} URLs crawled by multiple sources: {multi_source_urls[:3]}{'...' if len(multi_source_urls) > 3 else ''}")
|
|
451
|
+
|
|
452
|
+
providers_str = ', '.join(sorted(providers_used)) if providers_used else 'unknown'
|
|
453
|
+
success_count = len(successful_results)
|
|
454
|
+
fail_count = len(failed_results)
|
|
455
|
+
|
|
456
|
+
message = f"Successfully crawled {len(unique_content)} pages (using {providers_str})"
|
|
457
|
+
if fail_count > 0:
|
|
458
|
+
# 收集失败原因的详细信息
|
|
459
|
+
failed_details = []
|
|
460
|
+
for failed_result in failed_results:
|
|
461
|
+
# 尝试从错误消息中提取 provider 信息
|
|
462
|
+
if 'metaso' in failed_result.message.lower():
|
|
463
|
+
failed_details.append(f"Metaso: {failed_result.message}")
|
|
464
|
+
elif 'firecrawl' in failed_result.message.lower():
|
|
465
|
+
failed_details.append(f"Firecrawl: {failed_result.message}")
|
|
466
|
+
else:
|
|
467
|
+
failed_details.append(f"Unknown: {failed_result.message}")
|
|
468
|
+
|
|
469
|
+
message += f", {fail_count} API keys failed"
|
|
470
|
+
if failed_details:
|
|
471
|
+
logger.warning(f"❌ 失败的 API 详情: {'; '.join(failed_details)}")
|
|
472
|
+
# 在消息中也包含失败详情(但保持简洁)
|
|
473
|
+
if len(failed_details) == 1:
|
|
474
|
+
message += f" ({failed_details[0]})"
|
|
475
|
+
|
|
476
|
+
return ToolResult(
|
|
477
|
+
success=True,
|
|
478
|
+
message=message,
|
|
479
|
+
content=unique_content
|
|
480
|
+
)
|
|
481
|
+
|
|
482
|
+
def resolve(self) -> ToolResult:
|
|
483
|
+
"""Implement web crawling tool resolution logic, supporting multi-key concurrency"""
|
|
484
|
+
try:
|
|
485
|
+
# Get all available providers
|
|
486
|
+
providers = self._get_available_providers()
|
|
487
|
+
|
|
488
|
+
if not providers:
|
|
489
|
+
# No API key configured, guide to use curl
|
|
490
|
+
curl_command = f"curl -s -L '{self.tool.url}'"
|
|
491
|
+
|
|
492
|
+
# Add more curl options based on parameters
|
|
493
|
+
curl_suggestions = []
|
|
494
|
+
curl_suggestions.append(f"curl -s -L '{self.tool.url}' # Basic web content retrieval")
|
|
495
|
+
curl_suggestions.append(f"curl -s -L -H 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' '{self.tool.url}' # Add user agent")
|
|
496
|
+
curl_suggestions.append(f"curl -s -L --max-time 30 '{self.tool.url}' # Set timeout")
|
|
497
|
+
curl_suggestions.append(f"curl -s -L -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' '{self.tool.url}' # Set Accept header")
|
|
498
|
+
|
|
499
|
+
if self.tool.limit and self.tool.limit == 1:
|
|
500
|
+
curl_suggestions.append(f"# For single page scraping, you can use curl directly")
|
|
501
|
+
else:
|
|
502
|
+
curl_suggestions.append(f"# For multi-page crawling, recommend using wget recursive download:")
|
|
503
|
+
max_depth = self.tool.max_depth or 2
|
|
504
|
+
wget_cmd = f"wget --recursive --level={max_depth} --no-parent --reject='*.css,*.js,*.png,*.jpg,*.gif,*.pdf' --user-agent='Mozilla/5.0' '{self.tool.url}'"
|
|
505
|
+
curl_suggestions.append(wget_cmd)
|
|
506
|
+
|
|
507
|
+
if self.tool.exclude_paths:
|
|
508
|
+
curl_suggestions.append(f"# Exclude paths: {self.tool.exclude_paths}")
|
|
509
|
+
curl_suggestions.append(f"# Can use wget's --exclude-directories option")
|
|
510
|
+
|
|
511
|
+
if self.tool.include_paths:
|
|
512
|
+
curl_suggestions.append(f"# Include paths: {self.tool.include_paths}")
|
|
513
|
+
curl_suggestions.append(f"# Can use wget's --include-directories option")
|
|
514
|
+
|
|
515
|
+
suggestion_text = "\n".join(curl_suggestions)
|
|
516
|
+
|
|
517
|
+
return ToolResult(
|
|
518
|
+
success=False,
|
|
519
|
+
message=f"No web crawling API key configured (Metaso, Firecrawl). Recommend using curl or wget commands to get web content:\n\n{suggestion_text}\n\nConfiguration instructions:\n- Metaso: Set --metaso_api_key parameter\n- Firecrawl: Set --firecrawl_api_key parameter",
|
|
520
|
+
content={
|
|
521
|
+
"suggested_commands": curl_suggestions,
|
|
522
|
+
"target_url": self.tool.url,
|
|
523
|
+
"curl_basic": curl_command,
|
|
524
|
+
"wget_recursive": f"wget --recursive --level={self.tool.max_depth or 2} --no-parent --reject='*.css,*.js,*.png,*.jpg,*.gif,*.pdf' --user-agent='Mozilla/5.0' '{self.tool.url}'"
|
|
525
|
+
}
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
logger.info(f"🚀 Found {len(providers)} available API configurations, starting concurrent crawling")
|
|
529
|
+
for i, provider in enumerate(providers, 1):
|
|
530
|
+
logger.info(f" {i}. {provider['type'].upper()} API (key: ...{provider['api_key'][-4:]})")
|
|
531
|
+
|
|
532
|
+
# If only one provider, call directly
|
|
533
|
+
if len(providers) == 1:
|
|
534
|
+
logger.info(f"📝 Using single provider: {providers[0]['type'].upper()}")
|
|
535
|
+
return self._crawl_with_provider(providers[0])
|
|
536
|
+
|
|
537
|
+
# Use thread pool for concurrent execution with multiple providers
|
|
538
|
+
logger.info(f"🏁 Starting concurrent execution with {len(providers)} providers")
|
|
539
|
+
results = []
|
|
540
|
+
max_workers = min(len(providers), 5) # Limit maximum concurrency
|
|
541
|
+
|
|
542
|
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
543
|
+
# Submit all tasks
|
|
544
|
+
future_to_provider = {
|
|
545
|
+
executor.submit(self._crawl_with_provider, provider): provider
|
|
546
|
+
for provider in providers
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
# Collect results
|
|
550
|
+
for future in as_completed(future_to_provider):
|
|
551
|
+
provider = future_to_provider[future]
|
|
552
|
+
try:
|
|
553
|
+
result = future.result(timeout=300) # 5 minute timeout
|
|
554
|
+
results.append(result)
|
|
555
|
+
if result.success:
|
|
556
|
+
logger.info(f"✅ Provider {provider['type']} (key: ...{provider['api_key'][-4:]}) completed successfully")
|
|
557
|
+
else:
|
|
558
|
+
logger.warning(f"❌ Provider {provider['type']} (key: ...{provider['api_key'][-4:]}) failed: {result.message}")
|
|
559
|
+
except Exception as e:
|
|
560
|
+
logger.error(f"❌ Provider {provider['type']} (key: ...{provider['api_key'][-4:]}) execution exception: {str(e)}")
|
|
561
|
+
results.append(ToolResult(
|
|
562
|
+
success=False,
|
|
563
|
+
message=f"Provider {provider['type']} execution exception: {str(e)}",
|
|
564
|
+
content=[]
|
|
565
|
+
))
|
|
566
|
+
|
|
567
|
+
# Merge results
|
|
568
|
+
return self._merge_results(results)
|
|
569
|
+
|
|
570
|
+
except Exception as e:
|
|
571
|
+
logger.error(f"Web crawling tool execution failed: {str(e)}")
|
|
572
|
+
return ToolResult(
|
|
573
|
+
success=False,
|
|
574
|
+
message=f"Web crawling tool execution failed: {str(e)}",
|
|
575
|
+
content=traceback.format_exc()
|
|
576
|
+
)
|
|
577
|
+
|
|
578
|
+
|
|
579
|
+
class WebCrawlToolDescGenerator:
|
|
580
|
+
def __init__(self, params: Dict[str, Any]):
|
|
581
|
+
self.params = params
|
|
582
|
+
|
|
583
|
+
@byzerllm.prompt()
|
|
584
|
+
def web_crawl_description(self) -> Dict:
|
|
585
|
+
"""
|
|
586
|
+
Description: Request to perform deep web crawling using Firecrawl, Metaso API to recursively crawl website sub-pages. Use this when you need to extract comprehensive content from multiple related pages on a website, such as documentation sites or knowledge bases.
|
|
587
|
+
Parameters:
|
|
588
|
+
- url: (required) The URL to start crawling from
|
|
589
|
+
- limit: (optional) Maximum number of pages to crawl (default: 10)
|
|
590
|
+
- scrape_options: (optional) Additional scraping options as JSON string, e.g., '{"formats": ["markdown", "links"]}'
|
|
591
|
+
- exclude_paths: (optional) Comma-separated path patterns to exclude from crawling, e.g., "/api,/admin"
|
|
592
|
+
- include_paths: (optional) Comma-separated path patterns to include in crawling, e.g., "/docs,/help"
|
|
593
|
+
- max_depth: (optional) Maximum crawling depth from the starting URL
|
|
594
|
+
- allow_subdomains: (optional) Whether to allow crawling subdomains, "true" or "false" (default: "false")
|
|
595
|
+
- crawl_entire_domain: (optional) Whether to crawl the entire domain, "true" or "false" (default: "false") (Firecrawl only)
|
|
596
|
+
Usage:
|
|
597
|
+
<web_crawl>
|
|
598
|
+
<url>https://example.com</url>
|
|
599
|
+
<limit>10</limit>
|
|
600
|
+
<scrape_options>{"formats": ["markdown", "links"]}</scrape_options>
|
|
601
|
+
</web_crawl>
|
|
602
|
+
"""
|
|
603
|
+
return self.params
|
|
604
|
+
|
|
605
|
+
|
|
606
|
+
def register_web_crawl_tool():
|
|
607
|
+
"""Register web crawl tool"""
|
|
608
|
+
desc_gen = WebCrawlToolDescGenerator({})
|
|
609
|
+
|
|
610
|
+
# Prepare tool description
|
|
611
|
+
description = ToolDescription(
|
|
612
|
+
description=desc_gen.web_crawl_description.prompt()
|
|
613
|
+
)
|
|
614
|
+
|
|
615
|
+
# Prepare tool example
|
|
616
|
+
example = ToolExample(
|
|
617
|
+
title="Web crawl tool usage example",
|
|
618
|
+
body="""<web_crawl>
|
|
619
|
+
<url>https://docs.firecrawl.dev</url>
|
|
620
|
+
<limit>5</limit>
|
|
621
|
+
<scrape_options>{"formats": ["markdown", "links"]}</scrape_options>
|
|
622
|
+
<exclude_paths>/api,/admin</exclude_paths>
|
|
623
|
+
</web_crawl>"""
|
|
624
|
+
)
|
|
625
|
+
|
|
626
|
+
# Register tool
|
|
627
|
+
ToolRegistry.register_tool(
|
|
628
|
+
tool_tag="web_crawl", # XML tag name
|
|
629
|
+
tool_cls=WebCrawlTool, # Tool class
|
|
630
|
+
resolver_cls=WebCrawlToolResolver, # Resolver class
|
|
631
|
+
description=description, # Tool description
|
|
632
|
+
example=example, # Tool example
|
|
633
|
+
use_guideline="Use this tool for deep crawling of website content, recursively retrieving multiple pages from a site. Supports setting crawl depth, excluding paths, and other advanced options. Ideal for gathering comprehensive content from documentation sites, knowledge bases, or other structured websites." # Usage guideline
|
|
634
|
+
)
|