auto-coder 1.0.0__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of auto-coder might be problematic. Click here for more details.
- auto_coder-2.0.0.dist-info/LICENSE +158 -0
- auto_coder-2.0.0.dist-info/METADATA +558 -0
- auto_coder-2.0.0.dist-info/RECORD +795 -0
- {auto_coder-1.0.0.dist-info → auto_coder-2.0.0.dist-info}/WHEEL +1 -1
- {auto_coder-1.0.0.dist-info → auto_coder-2.0.0.dist-info}/entry_points.txt +3 -3
- autocoder/__init__.py +31 -0
- autocoder/agent/auto_filegroup.py +32 -13
- autocoder/agent/auto_learn_from_commit.py +9 -1
- autocoder/agent/base_agentic/__init__.py +3 -0
- autocoder/agent/base_agentic/agent_hub.py +1 -1
- autocoder/agent/base_agentic/base_agent.py +235 -136
- autocoder/agent/base_agentic/default_tools.py +119 -118
- autocoder/agent/base_agentic/test_base_agent.py +1 -1
- autocoder/agent/base_agentic/tool_registry.py +32 -20
- autocoder/agent/base_agentic/tools/read_file_tool_resolver.py +24 -3
- autocoder/agent/base_agentic/tools/write_to_file_tool_resolver.py +24 -11
- autocoder/agent/base_agentic/types.py +42 -0
- autocoder/agent/entry_command_agent/chat.py +73 -59
- autocoder/auto_coder.py +31 -40
- autocoder/auto_coder_rag.py +11 -1084
- autocoder/auto_coder_runner.py +970 -2345
- autocoder/auto_coder_terminal.py +26 -0
- autocoder/auto_coder_terminal_v3.py +190 -0
- autocoder/chat/conf_command.py +224 -124
- autocoder/chat/models_command.py +361 -299
- autocoder/chat/rules_command.py +79 -31
- autocoder/chat_auto_coder.py +988 -398
- autocoder/chat_auto_coder_lang.py +23 -732
- autocoder/commands/auto_command.py +25 -8
- autocoder/commands/auto_web.py +1 -1
- autocoder/commands/tools.py +44 -44
- autocoder/common/__init__.py +150 -128
- autocoder/common/ac_style_command_parser/__init__.py +39 -2
- autocoder/common/ac_style_command_parser/config.py +422 -0
- autocoder/common/ac_style_command_parser/parser.py +292 -78
- autocoder/common/ac_style_command_parser/test_parser.py +241 -16
- autocoder/common/ac_style_command_parser/test_typed_parser.py +342 -0
- autocoder/common/ac_style_command_parser/typed_parser.py +653 -0
- autocoder/common/action_yml_file_manager.py +25 -13
- autocoder/common/agent_events/__init__.py +52 -0
- autocoder/common/agent_events/agent_event_emitter.py +193 -0
- autocoder/common/agent_events/event_factory.py +177 -0
- autocoder/common/agent_events/examples.py +307 -0
- autocoder/common/agent_events/types.py +113 -0
- autocoder/common/agent_events/utils.py +68 -0
- autocoder/common/agent_hooks/__init__.py +44 -0
- autocoder/common/agent_hooks/examples.py +582 -0
- autocoder/common/agent_hooks/hook_executor.py +217 -0
- autocoder/common/agent_hooks/hook_manager.py +288 -0
- autocoder/common/agent_hooks/types.py +133 -0
- autocoder/common/agent_hooks/utils.py +99 -0
- autocoder/common/agent_query_queue/queue_executor.py +324 -0
- autocoder/common/agent_query_queue/queue_manager.py +325 -0
- autocoder/common/agents/__init__.py +11 -0
- autocoder/common/agents/agent_manager.py +323 -0
- autocoder/common/agents/agent_parser.py +189 -0
- autocoder/common/agents/example_usage.py +344 -0
- autocoder/common/agents/integration_example.py +330 -0
- autocoder/common/agents/test_agent_parser.py +545 -0
- autocoder/common/async_utils.py +101 -0
- autocoder/common/auto_coder_lang.py +23 -972
- autocoder/common/autocoderargs_parser/__init__.py +14 -0
- autocoder/common/autocoderargs_parser/parser.py +184 -0
- autocoder/common/autocoderargs_parser/tests/__init__.py +1 -0
- autocoder/common/autocoderargs_parser/tests/test_args_parser.py +235 -0
- autocoder/common/autocoderargs_parser/tests/test_token_parser.py +195 -0
- autocoder/common/autocoderargs_parser/token_parser.py +290 -0
- autocoder/common/buildin_tokenizer.py +2 -4
- autocoder/common/code_auto_generate.py +149 -74
- autocoder/common/code_auto_generate_diff.py +163 -70
- autocoder/common/code_auto_generate_editblock.py +179 -89
- autocoder/common/code_auto_generate_strict_diff.py +167 -72
- autocoder/common/code_auto_merge_editblock.py +13 -6
- autocoder/common/code_modification_ranker.py +1 -1
- autocoder/common/command_completer.py +3 -3
- autocoder/common/command_file_manager/manager.py +183 -47
- autocoder/common/command_file_manager/test_command_file_manager.py +507 -0
- autocoder/common/command_templates.py +1 -1
- autocoder/common/conf_utils.py +2 -4
- autocoder/common/conversations/config.py +11 -3
- autocoder/common/conversations/get_conversation_manager.py +100 -2
- autocoder/common/conversations/llm_stats_models.py +264 -0
- autocoder/common/conversations/manager.py +112 -28
- autocoder/common/conversations/models.py +16 -2
- autocoder/common/conversations/storage/index_manager.py +134 -10
- autocoder/common/core_config/__init__.py +63 -0
- autocoder/common/core_config/agentic_mode_manager.py +109 -0
- autocoder/common/core_config/base_manager.py +123 -0
- autocoder/common/core_config/compatibility.py +151 -0
- autocoder/common/core_config/config_manager.py +156 -0
- autocoder/common/core_config/conversation_manager.py +31 -0
- autocoder/common/core_config/exclude_manager.py +72 -0
- autocoder/common/core_config/file_manager.py +177 -0
- autocoder/common/core_config/human_as_model_manager.py +129 -0
- autocoder/common/core_config/lib_manager.py +54 -0
- autocoder/common/core_config/main_manager.py +81 -0
- autocoder/common/core_config/mode_manager.py +126 -0
- autocoder/common/core_config/models.py +70 -0
- autocoder/common/core_config/test_memory_manager.py +1056 -0
- autocoder/common/env_manager.py +282 -0
- autocoder/common/env_manager_usage_example.py +211 -0
- autocoder/common/file_checkpoint/conversation_checkpoint.py +19 -19
- autocoder/common/file_checkpoint/manager.py +264 -48
- autocoder/common/file_checkpoint/test_backup.py +1 -18
- autocoder/common/file_checkpoint/test_manager.py +270 -1
- autocoder/common/file_checkpoint/test_store.py +1 -17
- autocoder/common/file_handler/__init__.py +23 -0
- autocoder/common/file_handler/active_context_handler.py +159 -0
- autocoder/common/file_handler/add_files_handler.py +409 -0
- autocoder/common/file_handler/chat_handler.py +180 -0
- autocoder/common/file_handler/coding_handler.py +401 -0
- autocoder/common/file_handler/commit_handler.py +200 -0
- autocoder/common/file_handler/lib_handler.py +156 -0
- autocoder/common/file_handler/list_files_handler.py +111 -0
- autocoder/common/file_handler/mcp_handler.py +268 -0
- autocoder/common/file_handler/models_handler.py +493 -0
- autocoder/common/file_handler/remove_files_handler.py +172 -0
- autocoder/common/git_utils.py +44 -8
- autocoder/common/global_cancel.py +15 -6
- autocoder/common/ignorefiles/test_ignore_file_utils.py +1 -1
- autocoder/common/international/__init__.py +31 -0
- autocoder/common/international/demo_international.py +92 -0
- autocoder/common/international/message_manager.py +157 -0
- autocoder/common/international/messages/__init__.py +56 -0
- autocoder/common/international/messages/async_command_messages.py +507 -0
- autocoder/common/international/messages/auto_coder_messages.py +2208 -0
- autocoder/common/international/messages/chat_auto_coder_messages.py +1547 -0
- autocoder/common/international/messages/command_help_messages.py +986 -0
- autocoder/common/international/messages/conversation_command_messages.py +191 -0
- autocoder/common/international/messages/git_helper_plugin_messages.py +159 -0
- autocoder/common/international/messages/queue_command_messages.py +751 -0
- autocoder/common/international/messages/rules_command_messages.py +77 -0
- autocoder/common/international/messages/sdk_messages.py +1707 -0
- autocoder/common/international/messages/token_helper_plugin_messages.py +361 -0
- autocoder/common/international/messages/tool_display_messages.py +1212 -0
- autocoder/common/international/messages/workflow_exception_messages.py +473 -0
- autocoder/common/international/test_international.py +612 -0
- autocoder/common/linter_core/__init__.py +28 -0
- autocoder/common/linter_core/base_linter.py +61 -0
- autocoder/common/linter_core/config_loader.py +271 -0
- autocoder/common/linter_core/formatters/__init__.py +0 -0
- autocoder/common/linter_core/formatters/base_formatter.py +38 -0
- autocoder/common/linter_core/formatters/raw_formatter.py +17 -0
- autocoder/common/linter_core/linter.py +166 -0
- autocoder/common/linter_core/linter_factory.py +216 -0
- autocoder/common/linter_core/linter_manager.py +333 -0
- autocoder/common/linter_core/linters/__init__.py +9 -0
- autocoder/common/linter_core/linters/java_linter.py +342 -0
- autocoder/common/linter_core/linters/python_linter.py +115 -0
- autocoder/common/linter_core/linters/typescript_linter.py +119 -0
- autocoder/common/linter_core/models/__init__.py +7 -0
- autocoder/common/linter_core/models/lint_result.py +91 -0
- autocoder/common/linter_core/models.py +33 -0
- autocoder/common/linter_core/tests/__init__.py +3 -0
- autocoder/common/linter_core/tests/test_config_loader.py +323 -0
- autocoder/common/linter_core/tests/test_config_loading.py +308 -0
- autocoder/common/linter_core/tests/test_factory_manager.py +234 -0
- autocoder/common/linter_core/tests/test_formatters.py +147 -0
- autocoder/common/linter_core/tests/test_integration.py +317 -0
- autocoder/common/linter_core/tests/test_java_linter.py +496 -0
- autocoder/common/linter_core/tests/test_linters.py +265 -0
- autocoder/common/linter_core/tests/test_models.py +81 -0
- autocoder/common/linter_core/tests/verify_config_loading.py +296 -0
- autocoder/common/linter_core/tests/verify_fixes.py +183 -0
- autocoder/common/llm_friendly_package/__init__.py +31 -0
- autocoder/common/llm_friendly_package/base_manager.py +102 -0
- autocoder/common/llm_friendly_package/docs_manager.py +121 -0
- autocoder/common/llm_friendly_package/library_manager.py +171 -0
- autocoder/common/{llm_friendly_package.py → llm_friendly_package/main_manager.py} +204 -231
- autocoder/common/llm_friendly_package/models.py +40 -0
- autocoder/common/llm_friendly_package/test_llm_friendly_package.py +536 -0
- autocoder/common/llms/__init__.py +15 -0
- autocoder/common/llms/demo_error_handling.py +85 -0
- autocoder/common/llms/factory.py +142 -0
- autocoder/common/llms/manager.py +264 -0
- autocoder/common/llms/pricing.py +121 -0
- autocoder/common/llms/registry.py +288 -0
- autocoder/common/llms/schema.py +77 -0
- autocoder/common/llms/simple_demo.py +45 -0
- autocoder/common/llms/test_quick_model.py +116 -0
- autocoder/common/llms/test_remove_functionality.py +182 -0
- autocoder/common/llms/tests/__init__.py +1 -0
- autocoder/common/llms/tests/test_manager.py +330 -0
- autocoder/common/llms/tests/test_registry.py +364 -0
- autocoder/common/mcp_tools/__init__.py +62 -0
- autocoder/common/{mcp_tools.py → mcp_tools/executor.py} +49 -40
- autocoder/common/{mcp_hub.py → mcp_tools/hub.py} +42 -68
- autocoder/common/{mcp_server_install.py → mcp_tools/installer.py} +16 -28
- autocoder/common/{mcp_server.py → mcp_tools/server.py} +176 -48
- autocoder/common/mcp_tools/test_keyboard_interrupt.py +93 -0
- autocoder/common/mcp_tools/test_mcp_tools.py +391 -0
- autocoder/common/{mcp_server_types.py → mcp_tools/types.py} +121 -48
- autocoder/common/mcp_tools/verify_functionality.py +202 -0
- autocoder/common/model_speed_tester.py +32 -26
- autocoder/common/priority_directory_finder/__init__.py +142 -0
- autocoder/common/priority_directory_finder/examples.py +230 -0
- autocoder/common/priority_directory_finder/finder.py +283 -0
- autocoder/common/priority_directory_finder/models.py +236 -0
- autocoder/common/priority_directory_finder/test_priority_directory_finder.py +431 -0
- autocoder/common/project_scanner/__init__.py +18 -0
- autocoder/common/project_scanner/compat.py +77 -0
- autocoder/common/project_scanner/scanner.py +436 -0
- autocoder/common/project_tracker/__init__.py +27 -0
- autocoder/common/project_tracker/api.py +228 -0
- autocoder/common/project_tracker/demo.py +272 -0
- autocoder/common/project_tracker/tracker.py +487 -0
- autocoder/common/project_tracker/types.py +53 -0
- autocoder/common/pruner/__init__.py +67 -0
- autocoder/common/pruner/agentic_conversation_pruner.py +651 -102
- autocoder/common/pruner/conversation_message_ids_api.py +386 -0
- autocoder/common/pruner/conversation_message_ids_manager.py +347 -0
- autocoder/common/pruner/conversation_message_ids_pruner.py +473 -0
- autocoder/common/pruner/conversation_normalizer.py +347 -0
- autocoder/common/pruner/conversation_pruner.py +26 -6
- autocoder/common/pruner/test_agentic_conversation_pruner.py +554 -112
- autocoder/common/pruner/test_conversation_normalizer.py +502 -0
- autocoder/common/pruner/test_tool_content_detector.py +324 -0
- autocoder/common/pruner/tool_content_detector.py +227 -0
- autocoder/common/pruner/tools/__init__.py +18 -0
- autocoder/common/pruner/tools/query_message_ids.py +264 -0
- autocoder/common/pruner/tools/test_agentic_pruning_logic.py +432 -0
- autocoder/common/pruner/tools/test_message_ids_pruning_only.py +192 -0
- autocoder/common/pull_requests/__init__.py +9 -1
- autocoder/common/pull_requests/utils.py +122 -1
- autocoder/common/rag_manager/rag_manager.py +36 -40
- autocoder/common/rulefiles/__init__.py +53 -1
- autocoder/common/rulefiles/api.py +250 -0
- autocoder/common/rulefiles/core/__init__.py +14 -0
- autocoder/common/rulefiles/core/manager.py +241 -0
- autocoder/common/rulefiles/core/selector.py +805 -0
- autocoder/common/rulefiles/models/__init__.py +20 -0
- autocoder/common/rulefiles/models/index.py +16 -0
- autocoder/common/rulefiles/models/init_rule.py +18 -0
- autocoder/common/rulefiles/models/rule_file.py +18 -0
- autocoder/common/rulefiles/models/rule_relevance.py +14 -0
- autocoder/common/rulefiles/models/summary.py +16 -0
- autocoder/common/rulefiles/test_rulefiles.py +776 -0
- autocoder/common/rulefiles/utils/__init__.py +34 -0
- autocoder/common/rulefiles/utils/monitor.py +86 -0
- autocoder/common/rulefiles/utils/parser.py +230 -0
- autocoder/common/save_formatted_log.py +67 -10
- autocoder/common/search_replace.py +8 -1
- autocoder/common/search_replace_patch/__init__.py +24 -0
- autocoder/common/search_replace_patch/base.py +115 -0
- autocoder/common/search_replace_patch/manager.py +248 -0
- autocoder/common/search_replace_patch/patch_replacer.py +304 -0
- autocoder/common/search_replace_patch/similarity_replacer.py +306 -0
- autocoder/common/search_replace_patch/string_replacer.py +181 -0
- autocoder/common/search_replace_patch/tests/__init__.py +3 -0
- autocoder/common/search_replace_patch/tests/run_tests.py +126 -0
- autocoder/common/search_replace_patch/tests/test_base.py +188 -0
- autocoder/common/search_replace_patch/tests/test_empty_line_insert.py +233 -0
- autocoder/common/search_replace_patch/tests/test_integration.py +389 -0
- autocoder/common/search_replace_patch/tests/test_manager.py +351 -0
- autocoder/common/search_replace_patch/tests/test_patch_replacer.py +316 -0
- autocoder/common/search_replace_patch/tests/test_regex_replacer.py +306 -0
- autocoder/common/search_replace_patch/tests/test_similarity_replacer.py +384 -0
- autocoder/common/shell_commands/__init__.py +197 -0
- autocoder/common/shell_commands/background_process_notifier.py +346 -0
- autocoder/common/shell_commands/command_executor.py +1127 -0
- autocoder/common/shell_commands/error_recovery.py +541 -0
- autocoder/common/shell_commands/exceptions.py +120 -0
- autocoder/common/shell_commands/interactive_executor.py +476 -0
- autocoder/common/shell_commands/interactive_pexpect_process.py +623 -0
- autocoder/common/shell_commands/interactive_process.py +744 -0
- autocoder/common/shell_commands/interactive_session_manager.py +1014 -0
- autocoder/common/shell_commands/monitoring.py +529 -0
- autocoder/common/shell_commands/process_cleanup.py +386 -0
- autocoder/common/shell_commands/process_manager.py +606 -0
- autocoder/common/shell_commands/test_interactive_pexpect_process.py +281 -0
- autocoder/common/shell_commands/tests/__init__.py +6 -0
- autocoder/common/shell_commands/tests/conftest.py +118 -0
- autocoder/common/shell_commands/tests/test_background_process_notifier.py +703 -0
- autocoder/common/shell_commands/tests/test_command_executor.py +448 -0
- autocoder/common/shell_commands/tests/test_error_recovery.py +305 -0
- autocoder/common/shell_commands/tests/test_exceptions.py +299 -0
- autocoder/common/shell_commands/tests/test_execute_batch.py +588 -0
- autocoder/common/shell_commands/tests/test_indented_batch_commands.py +244 -0
- autocoder/common/shell_commands/tests/test_integration.py +664 -0
- autocoder/common/shell_commands/tests/test_monitoring.py +546 -0
- autocoder/common/shell_commands/tests/test_performance.py +632 -0
- autocoder/common/shell_commands/tests/test_process_cleanup.py +397 -0
- autocoder/common/shell_commands/tests/test_process_manager.py +606 -0
- autocoder/common/shell_commands/tests/test_timeout_config.py +343 -0
- autocoder/common/shell_commands/tests/test_timeout_manager.py +520 -0
- autocoder/common/shell_commands/timeout_config.py +315 -0
- autocoder/common/shell_commands/timeout_manager.py +352 -0
- autocoder/common/terminal_paste/__init__.py +14 -0
- autocoder/common/terminal_paste/demo.py +145 -0
- autocoder/common/terminal_paste/demo_paste_functionality.py +95 -0
- autocoder/common/terminal_paste/paste_handler.py +200 -0
- autocoder/common/terminal_paste/paste_manager.py +118 -0
- autocoder/common/terminal_paste/tests/__init__.py +1 -0
- autocoder/common/terminal_paste/tests/test_paste_handler.py +182 -0
- autocoder/common/terminal_paste/tests/test_paste_manager.py +126 -0
- autocoder/common/terminal_paste/utils.py +163 -0
- autocoder/common/test_autocoder_args.py +232 -0
- autocoder/common/test_env_manager.py +173 -0
- autocoder/common/test_env_manager_integration.py +159 -0
- autocoder/common/text_similarity/__init__.py +9 -0
- autocoder/common/text_similarity/demo.py +216 -0
- autocoder/common/text_similarity/examples.py +266 -0
- autocoder/common/text_similarity/test_text_similarity.py +306 -0
- autocoder/common/text_similarity/text_similarity.py +194 -0
- autocoder/common/text_similarity/utils.py +125 -0
- autocoder/common/todos/__init__.py +61 -0
- autocoder/common/todos/cache/__init__.py +16 -0
- autocoder/common/todos/cache/base_cache.py +89 -0
- autocoder/common/todos/cache/cache_manager.py +228 -0
- autocoder/common/todos/cache/memory_cache.py +225 -0
- autocoder/common/todos/config.py +155 -0
- autocoder/common/todos/exceptions.py +35 -0
- autocoder/common/todos/get_todo_manager.py +161 -0
- autocoder/common/todos/manager.py +537 -0
- autocoder/common/todos/models.py +239 -0
- autocoder/common/todos/storage/__init__.py +14 -0
- autocoder/common/todos/storage/base_storage.py +76 -0
- autocoder/common/todos/storage/file_storage.py +278 -0
- autocoder/common/tokens/counter.py +24 -2
- autocoder/common/tools_manager/__init__.py +17 -0
- autocoder/common/tools_manager/examples.py +162 -0
- autocoder/common/tools_manager/manager.py +385 -0
- autocoder/common/tools_manager/models.py +39 -0
- autocoder/common/tools_manager/test_tools_manager.py +303 -0
- autocoder/common/tools_manager/utils.py +191 -0
- autocoder/common/v2/agent/agentic_callbacks.py +270 -0
- autocoder/common/v2/agent/agentic_edit.py +2699 -1856
- autocoder/common/v2/agent/agentic_edit_change_manager.py +474 -0
- autocoder/common/v2/agent/agentic_edit_tools/__init__.py +35 -1
- autocoder/common/v2/agent/agentic_edit_tools/ac_mod_list_tool_resolver.py +279 -0
- autocoder/common/v2/agent/agentic_edit_tools/ac_mod_write_tool_resolver.py +10 -1
- autocoder/common/v2/agent/agentic_edit_tools/background_task_tool_resolver.py +1167 -0
- autocoder/common/v2/agent/agentic_edit_tools/base_tool_resolver.py +2 -2
- autocoder/common/v2/agent/agentic_edit_tools/conversation_message_ids_read_tool_resolver.py +214 -0
- autocoder/common/v2/agent/agentic_edit_tools/conversation_message_ids_write_tool_resolver.py +299 -0
- autocoder/common/v2/agent/agentic_edit_tools/count_tokens_tool_resolver.py +290 -0
- autocoder/common/v2/agent/agentic_edit_tools/execute_command_tool_resolver.py +564 -29
- autocoder/common/v2/agent/agentic_edit_tools/execute_workflow_tool_resolver.py +485 -0
- autocoder/common/v2/agent/agentic_edit_tools/extract_to_text_tool_resolver.py +225 -0
- autocoder/common/v2/agent/agentic_edit_tools/lint_report.py +79 -0
- autocoder/common/v2/agent/agentic_edit_tools/linter_config_models.py +343 -0
- autocoder/common/v2/agent/agentic_edit_tools/linter_enabled_tool_resolver.py +189 -0
- autocoder/common/v2/agent/agentic_edit_tools/list_files_tool_resolver.py +169 -101
- autocoder/common/v2/agent/agentic_edit_tools/load_extra_document_tool_resolver.py +349 -0
- autocoder/common/v2/agent/agentic_edit_tools/read_file_tool_resolver.py +243 -50
- autocoder/common/v2/agent/agentic_edit_tools/replace_in_file_tool_resolver.py +667 -147
- autocoder/common/v2/agent/agentic_edit_tools/run_named_subagents_tool_resolver.py +691 -0
- autocoder/common/v2/agent/agentic_edit_tools/search_files_tool_resolver.py +410 -86
- autocoder/common/v2/agent/agentic_edit_tools/session_interactive_tool_resolver.py +115 -0
- autocoder/common/v2/agent/agentic_edit_tools/session_start_tool_resolver.py +190 -0
- autocoder/common/v2/agent/agentic_edit_tools/session_stop_tool_resolver.py +76 -0
- autocoder/common/v2/agent/agentic_edit_tools/test_write_to_file_tool_resolver.py +207 -192
- autocoder/common/v2/agent/agentic_edit_tools/todo_read_tool_resolver.py +80 -63
- autocoder/common/v2/agent/agentic_edit_tools/todo_write_tool_resolver.py +237 -233
- autocoder/common/v2/agent/agentic_edit_tools/use_mcp_tool_resolver.py +2 -2
- autocoder/common/v2/agent/agentic_edit_tools/web_crawl_tool_resolver.py +557 -0
- autocoder/common/v2/agent/agentic_edit_tools/web_search_tool_resolver.py +600 -0
- autocoder/common/v2/agent/agentic_edit_tools/write_to_file_tool_resolver.py +56 -121
- autocoder/common/v2/agent/agentic_edit_types.py +343 -9
- autocoder/common/v2/agent/runner/__init__.py +3 -3
- autocoder/common/v2/agent/runner/base_runner.py +12 -26
- autocoder/common/v2/agent/runner/{event_runner.py → file_based_event_runner.py} +3 -2
- autocoder/common/v2/agent/runner/sdk_runner.py +150 -8
- autocoder/common/v2/agent/runner/terminal_runner.py +170 -57
- autocoder/common/v2/agent/runner/tool_display.py +557 -159
- autocoder/common/v2/agent/test_agentic_callbacks.py +265 -0
- autocoder/common/v2/agent/test_agentic_edit.py +194 -0
- autocoder/common/v2/agent/tool_caller/__init__.py +24 -0
- autocoder/common/v2/agent/tool_caller/default_tool_resolver_map.py +135 -0
- autocoder/common/v2/agent/tool_caller/integration_test.py +172 -0
- autocoder/common/v2/agent/tool_caller/plugins/__init__.py +14 -0
- autocoder/common/v2/agent/tool_caller/plugins/base_plugin.py +126 -0
- autocoder/common/v2/agent/tool_caller/plugins/examples/__init__.py +13 -0
- autocoder/common/v2/agent/tool_caller/plugins/examples/logging_plugin.py +164 -0
- autocoder/common/v2/agent/tool_caller/plugins/examples/security_filter_plugin.py +198 -0
- autocoder/common/v2/agent/tool_caller/plugins/plugin_interface.py +141 -0
- autocoder/common/v2/agent/tool_caller/test_tool_caller.py +278 -0
- autocoder/common/v2/agent/tool_caller/tool_call_plugin_manager.py +331 -0
- autocoder/common/v2/agent/tool_caller/tool_caller.py +337 -0
- autocoder/common/v2/agent/tool_caller/usage_example.py +193 -0
- autocoder/common/v2/code_agentic_editblock_manager.py +4 -4
- autocoder/common/v2/code_auto_generate.py +136 -78
- autocoder/common/v2/code_auto_generate_diff.py +135 -79
- autocoder/common/v2/code_auto_generate_editblock.py +174 -99
- autocoder/common/v2/code_auto_generate_strict_diff.py +151 -71
- autocoder/common/v2/code_auto_merge.py +1 -1
- autocoder/common/v2/code_auto_merge_editblock.py +13 -1
- autocoder/common/v2/code_diff_manager.py +3 -3
- autocoder/common/v2/code_editblock_manager.py +4 -14
- autocoder/common/v2/code_manager.py +1 -1
- autocoder/common/v2/code_strict_diff_manager.py +2 -2
- autocoder/common/wrap_llm_hint/__init__.py +10 -0
- autocoder/common/wrap_llm_hint/test_wrap_llm_hint.py +1067 -0
- autocoder/common/wrap_llm_hint/utils.py +432 -0
- autocoder/common/wrap_llm_hint/wrap_llm_hint.py +323 -0
- autocoder/completer/__init__.py +8 -0
- autocoder/completer/command_completer_v2.py +1051 -0
- autocoder/default_project/__init__.py +501 -0
- autocoder/dispacher/__init__.py +4 -12
- autocoder/dispacher/actions/action.py +165 -7
- autocoder/dispacher/actions/plugins/action_regex_project.py +2 -2
- autocoder/index/entry.py +116 -124
- autocoder/{agent → index/filter}/agentic_filter.py +322 -333
- autocoder/index/filter/normal_filter.py +5 -11
- autocoder/index/filter/quick_filter.py +1 -1
- autocoder/index/index.py +36 -9
- autocoder/index/tests/__init__.py +1 -0
- autocoder/index/tests/run_tests.py +195 -0
- autocoder/index/tests/test_entry.py +303 -0
- autocoder/index/tests/test_index_manager.py +314 -0
- autocoder/index/tests/test_module_integration.py +300 -0
- autocoder/index/tests/test_symbols_utils.py +183 -0
- autocoder/inner/__init__.py +4 -0
- autocoder/inner/agentic.py +932 -0
- autocoder/inner/async_command_handler.py +992 -0
- autocoder/inner/conversation_command_handlers.py +623 -0
- autocoder/inner/merge_command_handler.py +213 -0
- autocoder/inner/queue_command_handler.py +684 -0
- autocoder/models.py +95 -266
- autocoder/plugins/git_helper_plugin.py +31 -29
- autocoder/plugins/token_helper_plugin.py +65 -46
- autocoder/pyproject/__init__.py +32 -29
- autocoder/rag/agentic_rag.py +215 -75
- autocoder/rag/cache/simple_cache.py +1 -2
- autocoder/rag/loaders/image_loader.py +1 -1
- autocoder/rag/long_context_rag.py +42 -26
- autocoder/rag/qa_conversation_strategy.py +1 -1
- autocoder/rag/terminal/__init__.py +17 -0
- autocoder/rag/terminal/args.py +581 -0
- autocoder/rag/terminal/bootstrap.py +61 -0
- autocoder/rag/terminal/command_handlers.py +653 -0
- autocoder/rag/terminal/formatters/__init__.py +20 -0
- autocoder/rag/terminal/formatters/base.py +70 -0
- autocoder/rag/terminal/formatters/json_format.py +66 -0
- autocoder/rag/terminal/formatters/stream_json.py +95 -0
- autocoder/rag/terminal/formatters/text.py +28 -0
- autocoder/rag/terminal/init.py +120 -0
- autocoder/rag/terminal/utils.py +106 -0
- autocoder/rag/test_agentic_rag.py +389 -0
- autocoder/rag/test_doc_filter.py +3 -3
- autocoder/rag/test_long_context_rag.py +1 -1
- autocoder/rag/test_token_limiter.py +517 -10
- autocoder/rag/token_counter.py +3 -0
- autocoder/rag/token_limiter.py +19 -15
- autocoder/rag/tools/__init__.py +26 -2
- autocoder/rag/tools/bochaai_example.py +343 -0
- autocoder/rag/tools/bochaai_sdk.py +541 -0
- autocoder/rag/tools/metaso_example.py +268 -0
- autocoder/rag/tools/metaso_sdk.py +417 -0
- autocoder/rag/tools/recall_tool.py +28 -7
- autocoder/rag/tools/run_integration_tests.py +204 -0
- autocoder/rag/tools/test_all_providers.py +318 -0
- autocoder/rag/tools/test_bochaai_integration.py +482 -0
- autocoder/rag/tools/test_final_integration.py +215 -0
- autocoder/rag/tools/test_metaso_integration.py +424 -0
- autocoder/rag/tools/test_metaso_real.py +171 -0
- autocoder/rag/tools/test_web_crawl_tool.py +639 -0
- autocoder/rag/tools/test_web_search_tool.py +509 -0
- autocoder/rag/tools/todo_read_tool.py +202 -0
- autocoder/rag/tools/todo_write_tool.py +412 -0
- autocoder/rag/tools/web_crawl_tool.py +634 -0
- autocoder/rag/tools/web_search_tool.py +558 -0
- autocoder/rag/tools/web_tools_example.py +119 -0
- autocoder/rag/types.py +16 -0
- autocoder/rag/variable_holder.py +4 -2
- autocoder/rags.py +86 -79
- autocoder/regexproject/__init__.py +23 -21
- autocoder/sdk/__init__.py +46 -190
- autocoder/sdk/api.py +370 -0
- autocoder/sdk/async_runner/__init__.py +26 -0
- autocoder/sdk/async_runner/async_executor.py +650 -0
- autocoder/sdk/async_runner/async_handler.py +356 -0
- autocoder/sdk/async_runner/markdown_processor.py +595 -0
- autocoder/sdk/async_runner/task_metadata.py +284 -0
- autocoder/sdk/async_runner/worktree_manager.py +438 -0
- autocoder/sdk/cli/__init__.py +2 -5
- autocoder/sdk/cli/formatters.py +28 -204
- autocoder/sdk/cli/handlers.py +77 -44
- autocoder/sdk/cli/main.py +154 -171
- autocoder/sdk/cli/options.py +95 -22
- autocoder/sdk/constants.py +139 -51
- autocoder/sdk/core/auto_coder_core.py +484 -109
- autocoder/sdk/core/bridge.py +297 -115
- autocoder/sdk/exceptions.py +18 -12
- autocoder/sdk/formatters/__init__.py +19 -0
- autocoder/sdk/formatters/input.py +64 -0
- autocoder/sdk/formatters/output.py +247 -0
- autocoder/sdk/formatters/stream.py +54 -0
- autocoder/sdk/models/__init__.py +6 -5
- autocoder/sdk/models/options.py +55 -18
- autocoder/sdk/utils/formatters.py +27 -195
- autocoder/suffixproject/__init__.py +28 -25
- autocoder/terminal/__init__.py +14 -0
- autocoder/terminal/app.py +454 -0
- autocoder/terminal/args.py +32 -0
- autocoder/terminal/bootstrap.py +178 -0
- autocoder/terminal/command_processor.py +521 -0
- autocoder/terminal/command_registry.py +57 -0
- autocoder/terminal/help.py +97 -0
- autocoder/terminal/tasks/__init__.py +5 -0
- autocoder/terminal/tasks/background.py +77 -0
- autocoder/terminal/tasks/task_event.py +70 -0
- autocoder/terminal/ui/__init__.py +13 -0
- autocoder/terminal/ui/completer.py +268 -0
- autocoder/terminal/ui/keybindings.py +75 -0
- autocoder/terminal/ui/session.py +41 -0
- autocoder/terminal/ui/toolbar.py +64 -0
- autocoder/terminal/utils/__init__.py +13 -0
- autocoder/terminal/utils/errors.py +18 -0
- autocoder/terminal/utils/paths.py +19 -0
- autocoder/terminal/utils/shell.py +43 -0
- autocoder/terminal_v3/__init__.py +10 -0
- autocoder/terminal_v3/app.py +201 -0
- autocoder/terminal_v3/handlers/__init__.py +5 -0
- autocoder/terminal_v3/handlers/command_handler.py +131 -0
- autocoder/terminal_v3/models/__init__.py +6 -0
- autocoder/terminal_v3/models/conversation_buffer.py +214 -0
- autocoder/terminal_v3/models/message.py +50 -0
- autocoder/terminal_v3/models/tool_display.py +247 -0
- autocoder/terminal_v3/ui/__init__.py +7 -0
- autocoder/terminal_v3/ui/keybindings.py +56 -0
- autocoder/terminal_v3/ui/layout.py +141 -0
- autocoder/terminal_v3/ui/styles.py +43 -0
- autocoder/tsproject/__init__.py +23 -23
- autocoder/utils/auto_coder_utils/chat_stream_out.py +1 -1
- autocoder/utils/llms.py +88 -80
- autocoder/utils/math_utils.py +101 -0
- autocoder/utils/model_provider_selector.py +16 -4
- autocoder/utils/operate_config_api.py +33 -5
- autocoder/utils/thread_utils.py +2 -2
- autocoder/version.py +4 -2
- autocoder/workflow_agents/__init__.py +84 -0
- autocoder/workflow_agents/agent.py +143 -0
- autocoder/workflow_agents/exceptions.py +573 -0
- autocoder/workflow_agents/executor.py +489 -0
- autocoder/workflow_agents/loader.py +737 -0
- autocoder/workflow_agents/runner.py +267 -0
- autocoder/workflow_agents/types.py +172 -0
- autocoder/workflow_agents/utils.py +434 -0
- autocoder/workflow_agents/workflow_manager.py +211 -0
- auto_coder-1.0.0.dist-info/METADATA +0 -396
- auto_coder-1.0.0.dist-info/RECORD +0 -442
- auto_coder-1.0.0.dist-info/licenses/LICENSE +0 -201
- autocoder/auto_coder_server.py +0 -672
- autocoder/benchmark.py +0 -138
- autocoder/common/ac_style_command_parser/example.py +0 -7
- autocoder/common/cleaner.py +0 -31
- autocoder/common/command_completer_v2.py +0 -615
- autocoder/common/context_pruner.py +0 -477
- autocoder/common/conversation_pruner.py +0 -132
- autocoder/common/directory_cache/__init__.py +0 -1
- autocoder/common/directory_cache/cache.py +0 -192
- autocoder/common/directory_cache/test_cache.py +0 -190
- autocoder/common/file_checkpoint/examples.py +0 -217
- autocoder/common/llm_friendly_package_example.py +0 -138
- autocoder/common/llm_friendly_package_test.py +0 -63
- autocoder/common/pull_requests/test_module.py +0 -1
- autocoder/common/rulefiles/autocoderrules_utils.py +0 -484
- autocoder/common/text.py +0 -30
- autocoder/common/v2/agent/agentic_edit_tools/list_package_info_tool_resolver.py +0 -42
- autocoder/common/v2/agent/agentic_edit_tools/test_execute_command_tool_resolver.py +0 -70
- autocoder/common/v2/agent/agentic_edit_tools/test_search_files_tool_resolver.py +0 -163
- autocoder/common/v2/agent/agentic_tool_display.py +0 -183
- autocoder/plugins/dynamic_completion_example.py +0 -148
- autocoder/plugins/sample_plugin.py +0 -160
- autocoder/sdk/cli/__main__.py +0 -26
- autocoder/sdk/cli/completion_wrapper.py +0 -38
- autocoder/sdk/cli/install_completion.py +0 -301
- autocoder/sdk/models/messages.py +0 -209
- autocoder/sdk/session/__init__.py +0 -32
- autocoder/sdk/session/session.py +0 -106
- autocoder/sdk/session/session_manager.py +0 -56
- {auto_coder-1.0.0.dist-info → auto_coder-2.0.0.dist-info}/top_level.txt +0 -0
- /autocoder/{sdk/example.py → common/agent_query_queue/__init__.py} +0 -0
|
@@ -0,0 +1,541 @@
|
|
|
1
|
+
|
|
2
|
+
"""
|
|
3
|
+
BochaAI SDK 封装
|
|
4
|
+
|
|
5
|
+
该模块提供了 BochaAI API 的 Python SDK 封装,支持网页搜索和内容爬取功能。
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import time
|
|
10
|
+
import requests
|
|
11
|
+
from typing import Dict, Any, List, Optional, Union
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from loguru import logger
|
|
14
|
+
from urllib.parse import urlparse, urljoin
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class BochaAIWebPage:
|
|
19
|
+
"""BochaAI 网页搜索结果数据类"""
|
|
20
|
+
name: str
|
|
21
|
+
url: str
|
|
22
|
+
snippet: str
|
|
23
|
+
display_url: Optional[str] = None
|
|
24
|
+
summary: Optional[str] = None
|
|
25
|
+
site_name: Optional[str] = None
|
|
26
|
+
site_icon: Optional[str] = None
|
|
27
|
+
date_published: Optional[str] = None
|
|
28
|
+
date_last_crawled: Optional[str] = None
|
|
29
|
+
cached_page_url: Optional[str] = None
|
|
30
|
+
language: Optional[str] = None
|
|
31
|
+
is_family_friendly: Optional[bool] = None
|
|
32
|
+
is_navigational: Optional[bool] = None
|
|
33
|
+
|
|
34
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
35
|
+
"""转换为字典格式"""
|
|
36
|
+
result = {
|
|
37
|
+
"name": self.name,
|
|
38
|
+
"url": self.url,
|
|
39
|
+
"snippet": self.snippet
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
# 添加可选字段
|
|
43
|
+
optional_fields = [
|
|
44
|
+
"display_url", "summary", "site_name", "site_icon",
|
|
45
|
+
"date_published", "date_last_crawled", "cached_page_url",
|
|
46
|
+
"language", "is_family_friendly", "is_navigational"
|
|
47
|
+
]
|
|
48
|
+
|
|
49
|
+
for field_name in optional_fields:
|
|
50
|
+
value = getattr(self, field_name)
|
|
51
|
+
if value is not None:
|
|
52
|
+
result[field_name] = value
|
|
53
|
+
|
|
54
|
+
return result
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass
|
|
58
|
+
class BochaAIImage:
|
|
59
|
+
"""BochaAI 图片搜索结果数据类"""
|
|
60
|
+
thumbnail_url: str
|
|
61
|
+
content_url: str
|
|
62
|
+
host_page_url: str
|
|
63
|
+
width: int
|
|
64
|
+
height: int
|
|
65
|
+
name: Optional[str] = None
|
|
66
|
+
web_search_url: Optional[str] = None
|
|
67
|
+
date_published: Optional[str] = None
|
|
68
|
+
content_size: Optional[str] = None
|
|
69
|
+
encoding_format: Optional[str] = None
|
|
70
|
+
host_page_display_url: Optional[str] = None
|
|
71
|
+
|
|
72
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
73
|
+
"""转换为字典格式"""
|
|
74
|
+
return {
|
|
75
|
+
k: v for k, v in self.__dict__.items() if v is not None
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@dataclass
|
|
80
|
+
class BochaAISearchResponse:
|
|
81
|
+
"""BochaAI 搜索响应数据类"""
|
|
82
|
+
success: bool
|
|
83
|
+
query: str
|
|
84
|
+
total_matches: int = 0
|
|
85
|
+
webpages: List[BochaAIWebPage] = field(default_factory=list)
|
|
86
|
+
images: List[BochaAIImage] = field(default_factory=list)
|
|
87
|
+
error: Optional[str] = None
|
|
88
|
+
log_id: Optional[str] = None
|
|
89
|
+
|
|
90
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
91
|
+
"""转换为字典格式"""
|
|
92
|
+
return {
|
|
93
|
+
"success": self.success,
|
|
94
|
+
"query": self.query,
|
|
95
|
+
"total_matches": self.total_matches,
|
|
96
|
+
"webpages": [page.to_dict() for page in self.webpages],
|
|
97
|
+
"images": [img.to_dict() for img in self.images],
|
|
98
|
+
"error": self.error,
|
|
99
|
+
"log_id": self.log_id
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class BochaAIClient:
|
|
104
|
+
"""BochaAI API 客户端"""
|
|
105
|
+
|
|
106
|
+
BASE_URL = "https://api.bochaai.com/v1"
|
|
107
|
+
DEFAULT_TIMEOUT = 30 # 默认超时时间(秒)
|
|
108
|
+
|
|
109
|
+
def __init__(self, api_key: str, timeout: int = DEFAULT_TIMEOUT):
|
|
110
|
+
"""
|
|
111
|
+
初始化 BochaAI 客户端
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
api_key: BochaAI API 密钥
|
|
115
|
+
timeout: 请求超时时间(秒)
|
|
116
|
+
"""
|
|
117
|
+
self.api_key = api_key
|
|
118
|
+
self.timeout = timeout
|
|
119
|
+
self.session = requests.Session()
|
|
120
|
+
self.session.headers.update({
|
|
121
|
+
"Authorization": f"Bearer {api_key}",
|
|
122
|
+
"Content-Type": "application/json"
|
|
123
|
+
})
|
|
124
|
+
|
|
125
|
+
def search(
|
|
126
|
+
self,
|
|
127
|
+
query: str,
|
|
128
|
+
count: int = 10,
|
|
129
|
+
freshness: str = "noLimit",
|
|
130
|
+
summary: bool = False,
|
|
131
|
+
include: Optional[str] = None,
|
|
132
|
+
exclude: Optional[str] = None
|
|
133
|
+
) -> BochaAISearchResponse:
|
|
134
|
+
"""
|
|
135
|
+
执行网页搜索
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
query: 搜索查询字符串
|
|
139
|
+
count: 返回结果数量 (1-50)
|
|
140
|
+
freshness: 时间范围过滤 (noLimit, oneDay, oneWeek, oneMonth, oneYear)
|
|
141
|
+
summary: 是否包含文本摘要
|
|
142
|
+
include: 指定搜索的网站范围,多个域名用|或,分隔
|
|
143
|
+
exclude: 排除搜索的网站范围,多个域名用|或,分隔
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
BochaAISearchResponse: 搜索响应对象
|
|
147
|
+
"""
|
|
148
|
+
try:
|
|
149
|
+
# 准备请求数据
|
|
150
|
+
data = {
|
|
151
|
+
"query": query,
|
|
152
|
+
"count": min(max(count, 1), 50), # 限制在 1-50 范围内
|
|
153
|
+
"freshness": freshness,
|
|
154
|
+
"summary": summary
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
# 添加可选参数
|
|
158
|
+
if include:
|
|
159
|
+
data["include"] = include
|
|
160
|
+
if exclude:
|
|
161
|
+
data["exclude"] = exclude
|
|
162
|
+
|
|
163
|
+
logger.info(f"BochaAI 搜索请求: query={query}, count={count}, summary={summary}")
|
|
164
|
+
|
|
165
|
+
# 发送请求
|
|
166
|
+
response = self.session.post(
|
|
167
|
+
f"{self.BASE_URL}/web-search",
|
|
168
|
+
json=data,
|
|
169
|
+
timeout=self.timeout
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
# 检查响应状态
|
|
173
|
+
if response.status_code != 200:
|
|
174
|
+
error_msg = f"API 请求失败,状态码: {response.status_code}"
|
|
175
|
+
try:
|
|
176
|
+
error_data = response.json()
|
|
177
|
+
if "message" in error_data:
|
|
178
|
+
error_msg = f"{error_msg}, 错误信息: {error_data['message']}"
|
|
179
|
+
log_id = error_data.get("log_id", "")
|
|
180
|
+
except:
|
|
181
|
+
error_msg = f"{error_msg}, 响应内容: {response.text}"
|
|
182
|
+
log_id = ""
|
|
183
|
+
|
|
184
|
+
logger.error(error_msg)
|
|
185
|
+
return BochaAISearchResponse(
|
|
186
|
+
success=False,
|
|
187
|
+
query=query,
|
|
188
|
+
error=error_msg,
|
|
189
|
+
log_id=log_id
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
# 解析响应
|
|
193
|
+
result = response.json()
|
|
194
|
+
|
|
195
|
+
# 检查响应格式
|
|
196
|
+
if result.get("code") != 200:
|
|
197
|
+
error_msg = result.get("msg", "未知错误")
|
|
198
|
+
return BochaAISearchResponse(
|
|
199
|
+
success=False,
|
|
200
|
+
query=query,
|
|
201
|
+
error=error_msg,
|
|
202
|
+
log_id=result.get("log_id")
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
data = result.get("data", {})
|
|
206
|
+
|
|
207
|
+
# 解析网页搜索结果
|
|
208
|
+
webpages = []
|
|
209
|
+
web_pages_data = data.get("webPages", {})
|
|
210
|
+
for item in web_pages_data.get("value", []):
|
|
211
|
+
webpage = BochaAIWebPage(
|
|
212
|
+
name=item.get("name", ""),
|
|
213
|
+
url=item.get("url", ""),
|
|
214
|
+
snippet=item.get("snippet", ""),
|
|
215
|
+
display_url=item.get("displayUrl"),
|
|
216
|
+
summary=item.get("summary"),
|
|
217
|
+
site_name=item.get("siteName"),
|
|
218
|
+
site_icon=item.get("siteIcon"),
|
|
219
|
+
date_published=item.get("datePublished"),
|
|
220
|
+
date_last_crawled=item.get("dateLastCrawled"),
|
|
221
|
+
cached_page_url=item.get("cachedPageUrl"),
|
|
222
|
+
language=item.get("language"),
|
|
223
|
+
is_family_friendly=item.get("isFamilyFriendly"),
|
|
224
|
+
is_navigational=item.get("isNavigational")
|
|
225
|
+
)
|
|
226
|
+
webpages.append(webpage)
|
|
227
|
+
|
|
228
|
+
# 解析图片搜索结果
|
|
229
|
+
images = []
|
|
230
|
+
images_data = data.get("images", {})
|
|
231
|
+
for item in images_data.get("value", []):
|
|
232
|
+
image = BochaAIImage(
|
|
233
|
+
thumbnail_url=item.get("thumbnailUrl", ""),
|
|
234
|
+
content_url=item.get("contentUrl", ""),
|
|
235
|
+
host_page_url=item.get("hostPageUrl", ""),
|
|
236
|
+
width=item.get("width", 0),
|
|
237
|
+
height=item.get("height", 0),
|
|
238
|
+
name=item.get("name"),
|
|
239
|
+
web_search_url=item.get("webSearchUrl"),
|
|
240
|
+
date_published=item.get("datePublished"),
|
|
241
|
+
content_size=item.get("contentSize"),
|
|
242
|
+
encoding_format=item.get("encodingFormat"),
|
|
243
|
+
host_page_display_url=item.get("hostPageDisplayUrl")
|
|
244
|
+
)
|
|
245
|
+
images.append(image)
|
|
246
|
+
|
|
247
|
+
total_matches = web_pages_data.get("totalEstimatedMatches", 0)
|
|
248
|
+
|
|
249
|
+
return BochaAISearchResponse(
|
|
250
|
+
success=True,
|
|
251
|
+
query=query,
|
|
252
|
+
total_matches=total_matches,
|
|
253
|
+
webpages=webpages,
|
|
254
|
+
images=images,
|
|
255
|
+
log_id=result.get("log_id")
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
except requests.exceptions.Timeout:
|
|
259
|
+
error_msg = f"搜索请求超时({self.timeout}秒)"
|
|
260
|
+
logger.error(error_msg)
|
|
261
|
+
return BochaAISearchResponse(
|
|
262
|
+
success=False,
|
|
263
|
+
query=query,
|
|
264
|
+
error=error_msg
|
|
265
|
+
)
|
|
266
|
+
except requests.exceptions.RequestException as e:
|
|
267
|
+
error_msg = f"网络请求失败: {str(e)}"
|
|
268
|
+
logger.error(error_msg)
|
|
269
|
+
return BochaAISearchResponse(
|
|
270
|
+
success=False,
|
|
271
|
+
query=query,
|
|
272
|
+
error=error_msg
|
|
273
|
+
)
|
|
274
|
+
except Exception as e:
|
|
275
|
+
error_msg = f"搜索失败: {str(e)}"
|
|
276
|
+
logger.error(error_msg)
|
|
277
|
+
return BochaAISearchResponse(
|
|
278
|
+
success=False,
|
|
279
|
+
query=query,
|
|
280
|
+
error=error_msg
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
def crawl_page(
|
|
284
|
+
self,
|
|
285
|
+
url: str,
|
|
286
|
+
headers: Optional[Dict[str, str]] = None
|
|
287
|
+
) -> Dict[str, Any]:
|
|
288
|
+
"""
|
|
289
|
+
爬取单个网页内容(使用 requests)
|
|
290
|
+
|
|
291
|
+
Args:
|
|
292
|
+
url: 要爬取的网页 URL
|
|
293
|
+
headers: 自定义请求头
|
|
294
|
+
|
|
295
|
+
Returns:
|
|
296
|
+
Dict: 包含网页内容的字典
|
|
297
|
+
"""
|
|
298
|
+
try:
|
|
299
|
+
logger.info(f"BochaAI 爬取页面: {url}")
|
|
300
|
+
|
|
301
|
+
# 设置默认请求头
|
|
302
|
+
default_headers = {
|
|
303
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
if headers:
|
|
307
|
+
default_headers.update(headers)
|
|
308
|
+
|
|
309
|
+
# 发送请求
|
|
310
|
+
response = requests.get(
|
|
311
|
+
url,
|
|
312
|
+
headers=default_headers,
|
|
313
|
+
timeout=self.timeout,
|
|
314
|
+
allow_redirects=True
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
# 检查响应状态
|
|
318
|
+
if response.status_code != 200:
|
|
319
|
+
error_msg = f"页面请求失败,状态码: {response.status_code}"
|
|
320
|
+
logger.error(error_msg)
|
|
321
|
+
return {
|
|
322
|
+
"success": False,
|
|
323
|
+
"url": url,
|
|
324
|
+
"error": error_msg,
|
|
325
|
+
"status_code": response.status_code
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
# 获取内容
|
|
329
|
+
content = response.text
|
|
330
|
+
|
|
331
|
+
# 尝试提取标题
|
|
332
|
+
title = ""
|
|
333
|
+
import re
|
|
334
|
+
title_match = re.search(r'<title[^>]*>([^<]+)</title>', content, re.IGNORECASE)
|
|
335
|
+
if title_match:
|
|
336
|
+
title = title_match.group(1).strip()
|
|
337
|
+
|
|
338
|
+
return {
|
|
339
|
+
"success": True,
|
|
340
|
+
"url": url,
|
|
341
|
+
"title": title,
|
|
342
|
+
"content": content,
|
|
343
|
+
"content_type": response.headers.get("Content-Type", ""),
|
|
344
|
+
"content_length": len(content),
|
|
345
|
+
"status_code": response.status_code
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
except requests.exceptions.Timeout:
|
|
349
|
+
error_msg = f"页面请求超时({self.timeout}秒)"
|
|
350
|
+
logger.error(error_msg)
|
|
351
|
+
return {
|
|
352
|
+
"success": False,
|
|
353
|
+
"url": url,
|
|
354
|
+
"error": error_msg
|
|
355
|
+
}
|
|
356
|
+
except requests.exceptions.RequestException as e:
|
|
357
|
+
error_msg = f"网络请求失败: {str(e)}"
|
|
358
|
+
logger.error(error_msg)
|
|
359
|
+
return {
|
|
360
|
+
"success": False,
|
|
361
|
+
"url": url,
|
|
362
|
+
"error": error_msg
|
|
363
|
+
}
|
|
364
|
+
except Exception as e:
|
|
365
|
+
error_msg = f"爬取失败: {str(e)}"
|
|
366
|
+
logger.error(error_msg)
|
|
367
|
+
return {
|
|
368
|
+
"success": False,
|
|
369
|
+
"url": url,
|
|
370
|
+
"error": error_msg
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
def crawl(
|
|
374
|
+
self,
|
|
375
|
+
url: str,
|
|
376
|
+
limit: int = 10,
|
|
377
|
+
max_depth: Optional[int] = None,
|
|
378
|
+
exclude_paths: Optional[List[str]] = None,
|
|
379
|
+
include_paths: Optional[List[str]] = None,
|
|
380
|
+
allow_subdomains: bool = False
|
|
381
|
+
) -> List[Dict[str, Any]]:
|
|
382
|
+
"""
|
|
383
|
+
爬取网站内容(使用搜索和爬取组合实现)
|
|
384
|
+
|
|
385
|
+
Args:
|
|
386
|
+
url: 起始 URL
|
|
387
|
+
limit: 最大爬取页面数
|
|
388
|
+
max_depth: 最大爬取深度(暂未实现)
|
|
389
|
+
exclude_paths: 排除的路径列表
|
|
390
|
+
include_paths: 包含的路径列表
|
|
391
|
+
allow_subdomains: 是否允许子域名
|
|
392
|
+
|
|
393
|
+
Returns:
|
|
394
|
+
List[Dict]: 爬取的页面内容列表
|
|
395
|
+
"""
|
|
396
|
+
try:
|
|
397
|
+
# 解析起始 URL
|
|
398
|
+
parsed_url = urlparse(url)
|
|
399
|
+
domain = parsed_url.netloc
|
|
400
|
+
|
|
401
|
+
# 如果只爬取一个页面
|
|
402
|
+
if limit == 1:
|
|
403
|
+
result = self.crawl_page(url)
|
|
404
|
+
if result.get("success"):
|
|
405
|
+
return [{
|
|
406
|
+
"url": url,
|
|
407
|
+
"title": result.get("title", ""),
|
|
408
|
+
"content": result.get("content", ""),
|
|
409
|
+
"markdown": result.get("content", ""), # 简单处理,实际应该转换为 markdown
|
|
410
|
+
"links": [],
|
|
411
|
+
"metadata": {
|
|
412
|
+
"source": "bochaai",
|
|
413
|
+
"content_type": result.get("content_type", ""),
|
|
414
|
+
"status_code": result.get("status_code", 200)
|
|
415
|
+
}
|
|
416
|
+
}]
|
|
417
|
+
else:
|
|
418
|
+
return []
|
|
419
|
+
|
|
420
|
+
# 多页爬取:先搜索该域名下的相关页面
|
|
421
|
+
search_query = f"site:{domain}"
|
|
422
|
+
if parsed_url.path and parsed_url.path != "/":
|
|
423
|
+
# 如果有特定路径,添加到搜索中
|
|
424
|
+
path_keywords = parsed_url.path.strip("/").replace("/", " ")
|
|
425
|
+
search_query += f" {path_keywords}"
|
|
426
|
+
|
|
427
|
+
logger.info(f"开始爬取: {url}, 限制: {limit} 页")
|
|
428
|
+
|
|
429
|
+
# 执行搜索获取相关页面
|
|
430
|
+
search_result = self.search(
|
|
431
|
+
query=search_query,
|
|
432
|
+
count=min(limit * 2, 50), # 搜索更多结果以便筛选
|
|
433
|
+
summary=False
|
|
434
|
+
)
|
|
435
|
+
|
|
436
|
+
if not search_result.success:
|
|
437
|
+
logger.error(f"爬取失败: {search_result.error}")
|
|
438
|
+
# 至少尝试爬取起始页面
|
|
439
|
+
result = self.crawl_page(url)
|
|
440
|
+
if result.get("success"):
|
|
441
|
+
return [{
|
|
442
|
+
"url": url,
|
|
443
|
+
"title": result.get("title", ""),
|
|
444
|
+
"content": result.get("content", ""),
|
|
445
|
+
"markdown": result.get("content", ""),
|
|
446
|
+
"links": [],
|
|
447
|
+
"metadata": {"source": "bochaai"}
|
|
448
|
+
}]
|
|
449
|
+
return []
|
|
450
|
+
|
|
451
|
+
crawled_pages = []
|
|
452
|
+
urls_to_crawl = [url] # 起始URL
|
|
453
|
+
|
|
454
|
+
# 添加搜索结果中的URL
|
|
455
|
+
for webpage in search_result.webpages:
|
|
456
|
+
if webpage.url and webpage.url not in urls_to_crawl:
|
|
457
|
+
# 检查是否应该包含这个URL
|
|
458
|
+
should_include = True
|
|
459
|
+
|
|
460
|
+
# 检查排除路径
|
|
461
|
+
if exclude_paths:
|
|
462
|
+
for exclude_path in exclude_paths:
|
|
463
|
+
if exclude_path in webpage.url:
|
|
464
|
+
should_include = False
|
|
465
|
+
break
|
|
466
|
+
|
|
467
|
+
# 检查包含路径
|
|
468
|
+
if include_paths and should_include:
|
|
469
|
+
should_include = False
|
|
470
|
+
for include_path in include_paths:
|
|
471
|
+
if include_path in webpage.url:
|
|
472
|
+
should_include = True
|
|
473
|
+
break
|
|
474
|
+
|
|
475
|
+
# 检查子域名
|
|
476
|
+
if not allow_subdomains and should_include:
|
|
477
|
+
webpage_domain = urlparse(webpage.url).netloc
|
|
478
|
+
if webpage_domain != domain:
|
|
479
|
+
should_include = False
|
|
480
|
+
|
|
481
|
+
if should_include:
|
|
482
|
+
urls_to_crawl.append(webpage.url)
|
|
483
|
+
|
|
484
|
+
# 限制爬取数量
|
|
485
|
+
urls_to_crawl = urls_to_crawl[:limit]
|
|
486
|
+
|
|
487
|
+
# 爬取每个页面的内容
|
|
488
|
+
for idx, page_url in enumerate(urls_to_crawl):
|
|
489
|
+
logger.info(f"爬取页面 {idx + 1}/{len(urls_to_crawl)}: {page_url}")
|
|
490
|
+
|
|
491
|
+
result = self.crawl_page(page_url)
|
|
492
|
+
|
|
493
|
+
if result.get("success"):
|
|
494
|
+
# 从搜索结果中查找对应的信息
|
|
495
|
+
page_info = None
|
|
496
|
+
for webpage in search_result.webpages:
|
|
497
|
+
if webpage.url == page_url:
|
|
498
|
+
page_info = webpage
|
|
499
|
+
break
|
|
500
|
+
|
|
501
|
+
page_result = {
|
|
502
|
+
"url": page_url,
|
|
503
|
+
"title": page_info.name if page_info else result.get("title", ""),
|
|
504
|
+
"content": result.get("content", ""),
|
|
505
|
+
"markdown": result.get("content", ""), # 简单处理
|
|
506
|
+
"links": [],
|
|
507
|
+
"metadata": {
|
|
508
|
+
"position": idx + 1,
|
|
509
|
+
"source": "bochaai",
|
|
510
|
+
"content_type": result.get("content_type", ""),
|
|
511
|
+
"status_code": result.get("status_code", 200)
|
|
512
|
+
}
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
# 添加额外的元数据
|
|
516
|
+
if page_info:
|
|
517
|
+
if page_info.date_published:
|
|
518
|
+
page_result["metadata"]["date_published"] = page_info.date_published
|
|
519
|
+
if page_info.site_name:
|
|
520
|
+
page_result["metadata"]["site_name"] = page_info.site_name
|
|
521
|
+
|
|
522
|
+
crawled_pages.append(page_result)
|
|
523
|
+
|
|
524
|
+
# 添加小延迟避免请求过快
|
|
525
|
+
if idx < len(urls_to_crawl) - 1:
|
|
526
|
+
time.sleep(0.5)
|
|
527
|
+
|
|
528
|
+
return crawled_pages
|
|
529
|
+
|
|
530
|
+
except Exception as e:
|
|
531
|
+
logger.error(f"爬取失败: {str(e)}")
|
|
532
|
+
return []
|
|
533
|
+
|
|
534
|
+
def __enter__(self):
|
|
535
|
+
"""上下文管理器入口"""
|
|
536
|
+
return self
|
|
537
|
+
|
|
538
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
539
|
+
"""上下文管理器退出,关闭会话"""
|
|
540
|
+
self.session.close()
|
|
541
|
+
|