auto-coder 1.0.0__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of auto-coder might be problematic. Click here for more details.
- auto_coder-2.0.0.dist-info/LICENSE +158 -0
- auto_coder-2.0.0.dist-info/METADATA +558 -0
- auto_coder-2.0.0.dist-info/RECORD +795 -0
- {auto_coder-1.0.0.dist-info → auto_coder-2.0.0.dist-info}/WHEEL +1 -1
- {auto_coder-1.0.0.dist-info → auto_coder-2.0.0.dist-info}/entry_points.txt +3 -3
- autocoder/__init__.py +31 -0
- autocoder/agent/auto_filegroup.py +32 -13
- autocoder/agent/auto_learn_from_commit.py +9 -1
- autocoder/agent/base_agentic/__init__.py +3 -0
- autocoder/agent/base_agentic/agent_hub.py +1 -1
- autocoder/agent/base_agentic/base_agent.py +235 -136
- autocoder/agent/base_agentic/default_tools.py +119 -118
- autocoder/agent/base_agentic/test_base_agent.py +1 -1
- autocoder/agent/base_agentic/tool_registry.py +32 -20
- autocoder/agent/base_agentic/tools/read_file_tool_resolver.py +24 -3
- autocoder/agent/base_agentic/tools/write_to_file_tool_resolver.py +24 -11
- autocoder/agent/base_agentic/types.py +42 -0
- autocoder/agent/entry_command_agent/chat.py +73 -59
- autocoder/auto_coder.py +31 -40
- autocoder/auto_coder_rag.py +11 -1084
- autocoder/auto_coder_runner.py +970 -2345
- autocoder/auto_coder_terminal.py +26 -0
- autocoder/auto_coder_terminal_v3.py +190 -0
- autocoder/chat/conf_command.py +224 -124
- autocoder/chat/models_command.py +361 -299
- autocoder/chat/rules_command.py +79 -31
- autocoder/chat_auto_coder.py +988 -398
- autocoder/chat_auto_coder_lang.py +23 -732
- autocoder/commands/auto_command.py +25 -8
- autocoder/commands/auto_web.py +1 -1
- autocoder/commands/tools.py +44 -44
- autocoder/common/__init__.py +150 -128
- autocoder/common/ac_style_command_parser/__init__.py +39 -2
- autocoder/common/ac_style_command_parser/config.py +422 -0
- autocoder/common/ac_style_command_parser/parser.py +292 -78
- autocoder/common/ac_style_command_parser/test_parser.py +241 -16
- autocoder/common/ac_style_command_parser/test_typed_parser.py +342 -0
- autocoder/common/ac_style_command_parser/typed_parser.py +653 -0
- autocoder/common/action_yml_file_manager.py +25 -13
- autocoder/common/agent_events/__init__.py +52 -0
- autocoder/common/agent_events/agent_event_emitter.py +193 -0
- autocoder/common/agent_events/event_factory.py +177 -0
- autocoder/common/agent_events/examples.py +307 -0
- autocoder/common/agent_events/types.py +113 -0
- autocoder/common/agent_events/utils.py +68 -0
- autocoder/common/agent_hooks/__init__.py +44 -0
- autocoder/common/agent_hooks/examples.py +582 -0
- autocoder/common/agent_hooks/hook_executor.py +217 -0
- autocoder/common/agent_hooks/hook_manager.py +288 -0
- autocoder/common/agent_hooks/types.py +133 -0
- autocoder/common/agent_hooks/utils.py +99 -0
- autocoder/common/agent_query_queue/queue_executor.py +324 -0
- autocoder/common/agent_query_queue/queue_manager.py +325 -0
- autocoder/common/agents/__init__.py +11 -0
- autocoder/common/agents/agent_manager.py +323 -0
- autocoder/common/agents/agent_parser.py +189 -0
- autocoder/common/agents/example_usage.py +344 -0
- autocoder/common/agents/integration_example.py +330 -0
- autocoder/common/agents/test_agent_parser.py +545 -0
- autocoder/common/async_utils.py +101 -0
- autocoder/common/auto_coder_lang.py +23 -972
- autocoder/common/autocoderargs_parser/__init__.py +14 -0
- autocoder/common/autocoderargs_parser/parser.py +184 -0
- autocoder/common/autocoderargs_parser/tests/__init__.py +1 -0
- autocoder/common/autocoderargs_parser/tests/test_args_parser.py +235 -0
- autocoder/common/autocoderargs_parser/tests/test_token_parser.py +195 -0
- autocoder/common/autocoderargs_parser/token_parser.py +290 -0
- autocoder/common/buildin_tokenizer.py +2 -4
- autocoder/common/code_auto_generate.py +149 -74
- autocoder/common/code_auto_generate_diff.py +163 -70
- autocoder/common/code_auto_generate_editblock.py +179 -89
- autocoder/common/code_auto_generate_strict_diff.py +167 -72
- autocoder/common/code_auto_merge_editblock.py +13 -6
- autocoder/common/code_modification_ranker.py +1 -1
- autocoder/common/command_completer.py +3 -3
- autocoder/common/command_file_manager/manager.py +183 -47
- autocoder/common/command_file_manager/test_command_file_manager.py +507 -0
- autocoder/common/command_templates.py +1 -1
- autocoder/common/conf_utils.py +2 -4
- autocoder/common/conversations/config.py +11 -3
- autocoder/common/conversations/get_conversation_manager.py +100 -2
- autocoder/common/conversations/llm_stats_models.py +264 -0
- autocoder/common/conversations/manager.py +112 -28
- autocoder/common/conversations/models.py +16 -2
- autocoder/common/conversations/storage/index_manager.py +134 -10
- autocoder/common/core_config/__init__.py +63 -0
- autocoder/common/core_config/agentic_mode_manager.py +109 -0
- autocoder/common/core_config/base_manager.py +123 -0
- autocoder/common/core_config/compatibility.py +151 -0
- autocoder/common/core_config/config_manager.py +156 -0
- autocoder/common/core_config/conversation_manager.py +31 -0
- autocoder/common/core_config/exclude_manager.py +72 -0
- autocoder/common/core_config/file_manager.py +177 -0
- autocoder/common/core_config/human_as_model_manager.py +129 -0
- autocoder/common/core_config/lib_manager.py +54 -0
- autocoder/common/core_config/main_manager.py +81 -0
- autocoder/common/core_config/mode_manager.py +126 -0
- autocoder/common/core_config/models.py +70 -0
- autocoder/common/core_config/test_memory_manager.py +1056 -0
- autocoder/common/env_manager.py +282 -0
- autocoder/common/env_manager_usage_example.py +211 -0
- autocoder/common/file_checkpoint/conversation_checkpoint.py +19 -19
- autocoder/common/file_checkpoint/manager.py +264 -48
- autocoder/common/file_checkpoint/test_backup.py +1 -18
- autocoder/common/file_checkpoint/test_manager.py +270 -1
- autocoder/common/file_checkpoint/test_store.py +1 -17
- autocoder/common/file_handler/__init__.py +23 -0
- autocoder/common/file_handler/active_context_handler.py +159 -0
- autocoder/common/file_handler/add_files_handler.py +409 -0
- autocoder/common/file_handler/chat_handler.py +180 -0
- autocoder/common/file_handler/coding_handler.py +401 -0
- autocoder/common/file_handler/commit_handler.py +200 -0
- autocoder/common/file_handler/lib_handler.py +156 -0
- autocoder/common/file_handler/list_files_handler.py +111 -0
- autocoder/common/file_handler/mcp_handler.py +268 -0
- autocoder/common/file_handler/models_handler.py +493 -0
- autocoder/common/file_handler/remove_files_handler.py +172 -0
- autocoder/common/git_utils.py +44 -8
- autocoder/common/global_cancel.py +15 -6
- autocoder/common/ignorefiles/test_ignore_file_utils.py +1 -1
- autocoder/common/international/__init__.py +31 -0
- autocoder/common/international/demo_international.py +92 -0
- autocoder/common/international/message_manager.py +157 -0
- autocoder/common/international/messages/__init__.py +56 -0
- autocoder/common/international/messages/async_command_messages.py +507 -0
- autocoder/common/international/messages/auto_coder_messages.py +2208 -0
- autocoder/common/international/messages/chat_auto_coder_messages.py +1547 -0
- autocoder/common/international/messages/command_help_messages.py +986 -0
- autocoder/common/international/messages/conversation_command_messages.py +191 -0
- autocoder/common/international/messages/git_helper_plugin_messages.py +159 -0
- autocoder/common/international/messages/queue_command_messages.py +751 -0
- autocoder/common/international/messages/rules_command_messages.py +77 -0
- autocoder/common/international/messages/sdk_messages.py +1707 -0
- autocoder/common/international/messages/token_helper_plugin_messages.py +361 -0
- autocoder/common/international/messages/tool_display_messages.py +1212 -0
- autocoder/common/international/messages/workflow_exception_messages.py +473 -0
- autocoder/common/international/test_international.py +612 -0
- autocoder/common/linter_core/__init__.py +28 -0
- autocoder/common/linter_core/base_linter.py +61 -0
- autocoder/common/linter_core/config_loader.py +271 -0
- autocoder/common/linter_core/formatters/__init__.py +0 -0
- autocoder/common/linter_core/formatters/base_formatter.py +38 -0
- autocoder/common/linter_core/formatters/raw_formatter.py +17 -0
- autocoder/common/linter_core/linter.py +166 -0
- autocoder/common/linter_core/linter_factory.py +216 -0
- autocoder/common/linter_core/linter_manager.py +333 -0
- autocoder/common/linter_core/linters/__init__.py +9 -0
- autocoder/common/linter_core/linters/java_linter.py +342 -0
- autocoder/common/linter_core/linters/python_linter.py +115 -0
- autocoder/common/linter_core/linters/typescript_linter.py +119 -0
- autocoder/common/linter_core/models/__init__.py +7 -0
- autocoder/common/linter_core/models/lint_result.py +91 -0
- autocoder/common/linter_core/models.py +33 -0
- autocoder/common/linter_core/tests/__init__.py +3 -0
- autocoder/common/linter_core/tests/test_config_loader.py +323 -0
- autocoder/common/linter_core/tests/test_config_loading.py +308 -0
- autocoder/common/linter_core/tests/test_factory_manager.py +234 -0
- autocoder/common/linter_core/tests/test_formatters.py +147 -0
- autocoder/common/linter_core/tests/test_integration.py +317 -0
- autocoder/common/linter_core/tests/test_java_linter.py +496 -0
- autocoder/common/linter_core/tests/test_linters.py +265 -0
- autocoder/common/linter_core/tests/test_models.py +81 -0
- autocoder/common/linter_core/tests/verify_config_loading.py +296 -0
- autocoder/common/linter_core/tests/verify_fixes.py +183 -0
- autocoder/common/llm_friendly_package/__init__.py +31 -0
- autocoder/common/llm_friendly_package/base_manager.py +102 -0
- autocoder/common/llm_friendly_package/docs_manager.py +121 -0
- autocoder/common/llm_friendly_package/library_manager.py +171 -0
- autocoder/common/{llm_friendly_package.py → llm_friendly_package/main_manager.py} +204 -231
- autocoder/common/llm_friendly_package/models.py +40 -0
- autocoder/common/llm_friendly_package/test_llm_friendly_package.py +536 -0
- autocoder/common/llms/__init__.py +15 -0
- autocoder/common/llms/demo_error_handling.py +85 -0
- autocoder/common/llms/factory.py +142 -0
- autocoder/common/llms/manager.py +264 -0
- autocoder/common/llms/pricing.py +121 -0
- autocoder/common/llms/registry.py +288 -0
- autocoder/common/llms/schema.py +77 -0
- autocoder/common/llms/simple_demo.py +45 -0
- autocoder/common/llms/test_quick_model.py +116 -0
- autocoder/common/llms/test_remove_functionality.py +182 -0
- autocoder/common/llms/tests/__init__.py +1 -0
- autocoder/common/llms/tests/test_manager.py +330 -0
- autocoder/common/llms/tests/test_registry.py +364 -0
- autocoder/common/mcp_tools/__init__.py +62 -0
- autocoder/common/{mcp_tools.py → mcp_tools/executor.py} +49 -40
- autocoder/common/{mcp_hub.py → mcp_tools/hub.py} +42 -68
- autocoder/common/{mcp_server_install.py → mcp_tools/installer.py} +16 -28
- autocoder/common/{mcp_server.py → mcp_tools/server.py} +176 -48
- autocoder/common/mcp_tools/test_keyboard_interrupt.py +93 -0
- autocoder/common/mcp_tools/test_mcp_tools.py +391 -0
- autocoder/common/{mcp_server_types.py → mcp_tools/types.py} +121 -48
- autocoder/common/mcp_tools/verify_functionality.py +202 -0
- autocoder/common/model_speed_tester.py +32 -26
- autocoder/common/priority_directory_finder/__init__.py +142 -0
- autocoder/common/priority_directory_finder/examples.py +230 -0
- autocoder/common/priority_directory_finder/finder.py +283 -0
- autocoder/common/priority_directory_finder/models.py +236 -0
- autocoder/common/priority_directory_finder/test_priority_directory_finder.py +431 -0
- autocoder/common/project_scanner/__init__.py +18 -0
- autocoder/common/project_scanner/compat.py +77 -0
- autocoder/common/project_scanner/scanner.py +436 -0
- autocoder/common/project_tracker/__init__.py +27 -0
- autocoder/common/project_tracker/api.py +228 -0
- autocoder/common/project_tracker/demo.py +272 -0
- autocoder/common/project_tracker/tracker.py +487 -0
- autocoder/common/project_tracker/types.py +53 -0
- autocoder/common/pruner/__init__.py +67 -0
- autocoder/common/pruner/agentic_conversation_pruner.py +651 -102
- autocoder/common/pruner/conversation_message_ids_api.py +386 -0
- autocoder/common/pruner/conversation_message_ids_manager.py +347 -0
- autocoder/common/pruner/conversation_message_ids_pruner.py +473 -0
- autocoder/common/pruner/conversation_normalizer.py +347 -0
- autocoder/common/pruner/conversation_pruner.py +26 -6
- autocoder/common/pruner/test_agentic_conversation_pruner.py +554 -112
- autocoder/common/pruner/test_conversation_normalizer.py +502 -0
- autocoder/common/pruner/test_tool_content_detector.py +324 -0
- autocoder/common/pruner/tool_content_detector.py +227 -0
- autocoder/common/pruner/tools/__init__.py +18 -0
- autocoder/common/pruner/tools/query_message_ids.py +264 -0
- autocoder/common/pruner/tools/test_agentic_pruning_logic.py +432 -0
- autocoder/common/pruner/tools/test_message_ids_pruning_only.py +192 -0
- autocoder/common/pull_requests/__init__.py +9 -1
- autocoder/common/pull_requests/utils.py +122 -1
- autocoder/common/rag_manager/rag_manager.py +36 -40
- autocoder/common/rulefiles/__init__.py +53 -1
- autocoder/common/rulefiles/api.py +250 -0
- autocoder/common/rulefiles/core/__init__.py +14 -0
- autocoder/common/rulefiles/core/manager.py +241 -0
- autocoder/common/rulefiles/core/selector.py +805 -0
- autocoder/common/rulefiles/models/__init__.py +20 -0
- autocoder/common/rulefiles/models/index.py +16 -0
- autocoder/common/rulefiles/models/init_rule.py +18 -0
- autocoder/common/rulefiles/models/rule_file.py +18 -0
- autocoder/common/rulefiles/models/rule_relevance.py +14 -0
- autocoder/common/rulefiles/models/summary.py +16 -0
- autocoder/common/rulefiles/test_rulefiles.py +776 -0
- autocoder/common/rulefiles/utils/__init__.py +34 -0
- autocoder/common/rulefiles/utils/monitor.py +86 -0
- autocoder/common/rulefiles/utils/parser.py +230 -0
- autocoder/common/save_formatted_log.py +67 -10
- autocoder/common/search_replace.py +8 -1
- autocoder/common/search_replace_patch/__init__.py +24 -0
- autocoder/common/search_replace_patch/base.py +115 -0
- autocoder/common/search_replace_patch/manager.py +248 -0
- autocoder/common/search_replace_patch/patch_replacer.py +304 -0
- autocoder/common/search_replace_patch/similarity_replacer.py +306 -0
- autocoder/common/search_replace_patch/string_replacer.py +181 -0
- autocoder/common/search_replace_patch/tests/__init__.py +3 -0
- autocoder/common/search_replace_patch/tests/run_tests.py +126 -0
- autocoder/common/search_replace_patch/tests/test_base.py +188 -0
- autocoder/common/search_replace_patch/tests/test_empty_line_insert.py +233 -0
- autocoder/common/search_replace_patch/tests/test_integration.py +389 -0
- autocoder/common/search_replace_patch/tests/test_manager.py +351 -0
- autocoder/common/search_replace_patch/tests/test_patch_replacer.py +316 -0
- autocoder/common/search_replace_patch/tests/test_regex_replacer.py +306 -0
- autocoder/common/search_replace_patch/tests/test_similarity_replacer.py +384 -0
- autocoder/common/shell_commands/__init__.py +197 -0
- autocoder/common/shell_commands/background_process_notifier.py +346 -0
- autocoder/common/shell_commands/command_executor.py +1127 -0
- autocoder/common/shell_commands/error_recovery.py +541 -0
- autocoder/common/shell_commands/exceptions.py +120 -0
- autocoder/common/shell_commands/interactive_executor.py +476 -0
- autocoder/common/shell_commands/interactive_pexpect_process.py +623 -0
- autocoder/common/shell_commands/interactive_process.py +744 -0
- autocoder/common/shell_commands/interactive_session_manager.py +1014 -0
- autocoder/common/shell_commands/monitoring.py +529 -0
- autocoder/common/shell_commands/process_cleanup.py +386 -0
- autocoder/common/shell_commands/process_manager.py +606 -0
- autocoder/common/shell_commands/test_interactive_pexpect_process.py +281 -0
- autocoder/common/shell_commands/tests/__init__.py +6 -0
- autocoder/common/shell_commands/tests/conftest.py +118 -0
- autocoder/common/shell_commands/tests/test_background_process_notifier.py +703 -0
- autocoder/common/shell_commands/tests/test_command_executor.py +448 -0
- autocoder/common/shell_commands/tests/test_error_recovery.py +305 -0
- autocoder/common/shell_commands/tests/test_exceptions.py +299 -0
- autocoder/common/shell_commands/tests/test_execute_batch.py +588 -0
- autocoder/common/shell_commands/tests/test_indented_batch_commands.py +244 -0
- autocoder/common/shell_commands/tests/test_integration.py +664 -0
- autocoder/common/shell_commands/tests/test_monitoring.py +546 -0
- autocoder/common/shell_commands/tests/test_performance.py +632 -0
- autocoder/common/shell_commands/tests/test_process_cleanup.py +397 -0
- autocoder/common/shell_commands/tests/test_process_manager.py +606 -0
- autocoder/common/shell_commands/tests/test_timeout_config.py +343 -0
- autocoder/common/shell_commands/tests/test_timeout_manager.py +520 -0
- autocoder/common/shell_commands/timeout_config.py +315 -0
- autocoder/common/shell_commands/timeout_manager.py +352 -0
- autocoder/common/terminal_paste/__init__.py +14 -0
- autocoder/common/terminal_paste/demo.py +145 -0
- autocoder/common/terminal_paste/demo_paste_functionality.py +95 -0
- autocoder/common/terminal_paste/paste_handler.py +200 -0
- autocoder/common/terminal_paste/paste_manager.py +118 -0
- autocoder/common/terminal_paste/tests/__init__.py +1 -0
- autocoder/common/terminal_paste/tests/test_paste_handler.py +182 -0
- autocoder/common/terminal_paste/tests/test_paste_manager.py +126 -0
- autocoder/common/terminal_paste/utils.py +163 -0
- autocoder/common/test_autocoder_args.py +232 -0
- autocoder/common/test_env_manager.py +173 -0
- autocoder/common/test_env_manager_integration.py +159 -0
- autocoder/common/text_similarity/__init__.py +9 -0
- autocoder/common/text_similarity/demo.py +216 -0
- autocoder/common/text_similarity/examples.py +266 -0
- autocoder/common/text_similarity/test_text_similarity.py +306 -0
- autocoder/common/text_similarity/text_similarity.py +194 -0
- autocoder/common/text_similarity/utils.py +125 -0
- autocoder/common/todos/__init__.py +61 -0
- autocoder/common/todos/cache/__init__.py +16 -0
- autocoder/common/todos/cache/base_cache.py +89 -0
- autocoder/common/todos/cache/cache_manager.py +228 -0
- autocoder/common/todos/cache/memory_cache.py +225 -0
- autocoder/common/todos/config.py +155 -0
- autocoder/common/todos/exceptions.py +35 -0
- autocoder/common/todos/get_todo_manager.py +161 -0
- autocoder/common/todos/manager.py +537 -0
- autocoder/common/todos/models.py +239 -0
- autocoder/common/todos/storage/__init__.py +14 -0
- autocoder/common/todos/storage/base_storage.py +76 -0
- autocoder/common/todos/storage/file_storage.py +278 -0
- autocoder/common/tokens/counter.py +24 -2
- autocoder/common/tools_manager/__init__.py +17 -0
- autocoder/common/tools_manager/examples.py +162 -0
- autocoder/common/tools_manager/manager.py +385 -0
- autocoder/common/tools_manager/models.py +39 -0
- autocoder/common/tools_manager/test_tools_manager.py +303 -0
- autocoder/common/tools_manager/utils.py +191 -0
- autocoder/common/v2/agent/agentic_callbacks.py +270 -0
- autocoder/common/v2/agent/agentic_edit.py +2699 -1856
- autocoder/common/v2/agent/agentic_edit_change_manager.py +474 -0
- autocoder/common/v2/agent/agentic_edit_tools/__init__.py +35 -1
- autocoder/common/v2/agent/agentic_edit_tools/ac_mod_list_tool_resolver.py +279 -0
- autocoder/common/v2/agent/agentic_edit_tools/ac_mod_write_tool_resolver.py +10 -1
- autocoder/common/v2/agent/agentic_edit_tools/background_task_tool_resolver.py +1167 -0
- autocoder/common/v2/agent/agentic_edit_tools/base_tool_resolver.py +2 -2
- autocoder/common/v2/agent/agentic_edit_tools/conversation_message_ids_read_tool_resolver.py +214 -0
- autocoder/common/v2/agent/agentic_edit_tools/conversation_message_ids_write_tool_resolver.py +299 -0
- autocoder/common/v2/agent/agentic_edit_tools/count_tokens_tool_resolver.py +290 -0
- autocoder/common/v2/agent/agentic_edit_tools/execute_command_tool_resolver.py +564 -29
- autocoder/common/v2/agent/agentic_edit_tools/execute_workflow_tool_resolver.py +485 -0
- autocoder/common/v2/agent/agentic_edit_tools/extract_to_text_tool_resolver.py +225 -0
- autocoder/common/v2/agent/agentic_edit_tools/lint_report.py +79 -0
- autocoder/common/v2/agent/agentic_edit_tools/linter_config_models.py +343 -0
- autocoder/common/v2/agent/agentic_edit_tools/linter_enabled_tool_resolver.py +189 -0
- autocoder/common/v2/agent/agentic_edit_tools/list_files_tool_resolver.py +169 -101
- autocoder/common/v2/agent/agentic_edit_tools/load_extra_document_tool_resolver.py +349 -0
- autocoder/common/v2/agent/agentic_edit_tools/read_file_tool_resolver.py +243 -50
- autocoder/common/v2/agent/agentic_edit_tools/replace_in_file_tool_resolver.py +667 -147
- autocoder/common/v2/agent/agentic_edit_tools/run_named_subagents_tool_resolver.py +691 -0
- autocoder/common/v2/agent/agentic_edit_tools/search_files_tool_resolver.py +410 -86
- autocoder/common/v2/agent/agentic_edit_tools/session_interactive_tool_resolver.py +115 -0
- autocoder/common/v2/agent/agentic_edit_tools/session_start_tool_resolver.py +190 -0
- autocoder/common/v2/agent/agentic_edit_tools/session_stop_tool_resolver.py +76 -0
- autocoder/common/v2/agent/agentic_edit_tools/test_write_to_file_tool_resolver.py +207 -192
- autocoder/common/v2/agent/agentic_edit_tools/todo_read_tool_resolver.py +80 -63
- autocoder/common/v2/agent/agentic_edit_tools/todo_write_tool_resolver.py +237 -233
- autocoder/common/v2/agent/agentic_edit_tools/use_mcp_tool_resolver.py +2 -2
- autocoder/common/v2/agent/agentic_edit_tools/web_crawl_tool_resolver.py +557 -0
- autocoder/common/v2/agent/agentic_edit_tools/web_search_tool_resolver.py +600 -0
- autocoder/common/v2/agent/agentic_edit_tools/write_to_file_tool_resolver.py +56 -121
- autocoder/common/v2/agent/agentic_edit_types.py +343 -9
- autocoder/common/v2/agent/runner/__init__.py +3 -3
- autocoder/common/v2/agent/runner/base_runner.py +12 -26
- autocoder/common/v2/agent/runner/{event_runner.py → file_based_event_runner.py} +3 -2
- autocoder/common/v2/agent/runner/sdk_runner.py +150 -8
- autocoder/common/v2/agent/runner/terminal_runner.py +170 -57
- autocoder/common/v2/agent/runner/tool_display.py +557 -159
- autocoder/common/v2/agent/test_agentic_callbacks.py +265 -0
- autocoder/common/v2/agent/test_agentic_edit.py +194 -0
- autocoder/common/v2/agent/tool_caller/__init__.py +24 -0
- autocoder/common/v2/agent/tool_caller/default_tool_resolver_map.py +135 -0
- autocoder/common/v2/agent/tool_caller/integration_test.py +172 -0
- autocoder/common/v2/agent/tool_caller/plugins/__init__.py +14 -0
- autocoder/common/v2/agent/tool_caller/plugins/base_plugin.py +126 -0
- autocoder/common/v2/agent/tool_caller/plugins/examples/__init__.py +13 -0
- autocoder/common/v2/agent/tool_caller/plugins/examples/logging_plugin.py +164 -0
- autocoder/common/v2/agent/tool_caller/plugins/examples/security_filter_plugin.py +198 -0
- autocoder/common/v2/agent/tool_caller/plugins/plugin_interface.py +141 -0
- autocoder/common/v2/agent/tool_caller/test_tool_caller.py +278 -0
- autocoder/common/v2/agent/tool_caller/tool_call_plugin_manager.py +331 -0
- autocoder/common/v2/agent/tool_caller/tool_caller.py +337 -0
- autocoder/common/v2/agent/tool_caller/usage_example.py +193 -0
- autocoder/common/v2/code_agentic_editblock_manager.py +4 -4
- autocoder/common/v2/code_auto_generate.py +136 -78
- autocoder/common/v2/code_auto_generate_diff.py +135 -79
- autocoder/common/v2/code_auto_generate_editblock.py +174 -99
- autocoder/common/v2/code_auto_generate_strict_diff.py +151 -71
- autocoder/common/v2/code_auto_merge.py +1 -1
- autocoder/common/v2/code_auto_merge_editblock.py +13 -1
- autocoder/common/v2/code_diff_manager.py +3 -3
- autocoder/common/v2/code_editblock_manager.py +4 -14
- autocoder/common/v2/code_manager.py +1 -1
- autocoder/common/v2/code_strict_diff_manager.py +2 -2
- autocoder/common/wrap_llm_hint/__init__.py +10 -0
- autocoder/common/wrap_llm_hint/test_wrap_llm_hint.py +1067 -0
- autocoder/common/wrap_llm_hint/utils.py +432 -0
- autocoder/common/wrap_llm_hint/wrap_llm_hint.py +323 -0
- autocoder/completer/__init__.py +8 -0
- autocoder/completer/command_completer_v2.py +1051 -0
- autocoder/default_project/__init__.py +501 -0
- autocoder/dispacher/__init__.py +4 -12
- autocoder/dispacher/actions/action.py +165 -7
- autocoder/dispacher/actions/plugins/action_regex_project.py +2 -2
- autocoder/index/entry.py +116 -124
- autocoder/{agent → index/filter}/agentic_filter.py +322 -333
- autocoder/index/filter/normal_filter.py +5 -11
- autocoder/index/filter/quick_filter.py +1 -1
- autocoder/index/index.py +36 -9
- autocoder/index/tests/__init__.py +1 -0
- autocoder/index/tests/run_tests.py +195 -0
- autocoder/index/tests/test_entry.py +303 -0
- autocoder/index/tests/test_index_manager.py +314 -0
- autocoder/index/tests/test_module_integration.py +300 -0
- autocoder/index/tests/test_symbols_utils.py +183 -0
- autocoder/inner/__init__.py +4 -0
- autocoder/inner/agentic.py +932 -0
- autocoder/inner/async_command_handler.py +992 -0
- autocoder/inner/conversation_command_handlers.py +623 -0
- autocoder/inner/merge_command_handler.py +213 -0
- autocoder/inner/queue_command_handler.py +684 -0
- autocoder/models.py +95 -266
- autocoder/plugins/git_helper_plugin.py +31 -29
- autocoder/plugins/token_helper_plugin.py +65 -46
- autocoder/pyproject/__init__.py +32 -29
- autocoder/rag/agentic_rag.py +215 -75
- autocoder/rag/cache/simple_cache.py +1 -2
- autocoder/rag/loaders/image_loader.py +1 -1
- autocoder/rag/long_context_rag.py +42 -26
- autocoder/rag/qa_conversation_strategy.py +1 -1
- autocoder/rag/terminal/__init__.py +17 -0
- autocoder/rag/terminal/args.py +581 -0
- autocoder/rag/terminal/bootstrap.py +61 -0
- autocoder/rag/terminal/command_handlers.py +653 -0
- autocoder/rag/terminal/formatters/__init__.py +20 -0
- autocoder/rag/terminal/formatters/base.py +70 -0
- autocoder/rag/terminal/formatters/json_format.py +66 -0
- autocoder/rag/terminal/formatters/stream_json.py +95 -0
- autocoder/rag/terminal/formatters/text.py +28 -0
- autocoder/rag/terminal/init.py +120 -0
- autocoder/rag/terminal/utils.py +106 -0
- autocoder/rag/test_agentic_rag.py +389 -0
- autocoder/rag/test_doc_filter.py +3 -3
- autocoder/rag/test_long_context_rag.py +1 -1
- autocoder/rag/test_token_limiter.py +517 -10
- autocoder/rag/token_counter.py +3 -0
- autocoder/rag/token_limiter.py +19 -15
- autocoder/rag/tools/__init__.py +26 -2
- autocoder/rag/tools/bochaai_example.py +343 -0
- autocoder/rag/tools/bochaai_sdk.py +541 -0
- autocoder/rag/tools/metaso_example.py +268 -0
- autocoder/rag/tools/metaso_sdk.py +417 -0
- autocoder/rag/tools/recall_tool.py +28 -7
- autocoder/rag/tools/run_integration_tests.py +204 -0
- autocoder/rag/tools/test_all_providers.py +318 -0
- autocoder/rag/tools/test_bochaai_integration.py +482 -0
- autocoder/rag/tools/test_final_integration.py +215 -0
- autocoder/rag/tools/test_metaso_integration.py +424 -0
- autocoder/rag/tools/test_metaso_real.py +171 -0
- autocoder/rag/tools/test_web_crawl_tool.py +639 -0
- autocoder/rag/tools/test_web_search_tool.py +509 -0
- autocoder/rag/tools/todo_read_tool.py +202 -0
- autocoder/rag/tools/todo_write_tool.py +412 -0
- autocoder/rag/tools/web_crawl_tool.py +634 -0
- autocoder/rag/tools/web_search_tool.py +558 -0
- autocoder/rag/tools/web_tools_example.py +119 -0
- autocoder/rag/types.py +16 -0
- autocoder/rag/variable_holder.py +4 -2
- autocoder/rags.py +86 -79
- autocoder/regexproject/__init__.py +23 -21
- autocoder/sdk/__init__.py +46 -190
- autocoder/sdk/api.py +370 -0
- autocoder/sdk/async_runner/__init__.py +26 -0
- autocoder/sdk/async_runner/async_executor.py +650 -0
- autocoder/sdk/async_runner/async_handler.py +356 -0
- autocoder/sdk/async_runner/markdown_processor.py +595 -0
- autocoder/sdk/async_runner/task_metadata.py +284 -0
- autocoder/sdk/async_runner/worktree_manager.py +438 -0
- autocoder/sdk/cli/__init__.py +2 -5
- autocoder/sdk/cli/formatters.py +28 -204
- autocoder/sdk/cli/handlers.py +77 -44
- autocoder/sdk/cli/main.py +154 -171
- autocoder/sdk/cli/options.py +95 -22
- autocoder/sdk/constants.py +139 -51
- autocoder/sdk/core/auto_coder_core.py +484 -109
- autocoder/sdk/core/bridge.py +297 -115
- autocoder/sdk/exceptions.py +18 -12
- autocoder/sdk/formatters/__init__.py +19 -0
- autocoder/sdk/formatters/input.py +64 -0
- autocoder/sdk/formatters/output.py +247 -0
- autocoder/sdk/formatters/stream.py +54 -0
- autocoder/sdk/models/__init__.py +6 -5
- autocoder/sdk/models/options.py +55 -18
- autocoder/sdk/utils/formatters.py +27 -195
- autocoder/suffixproject/__init__.py +28 -25
- autocoder/terminal/__init__.py +14 -0
- autocoder/terminal/app.py +454 -0
- autocoder/terminal/args.py +32 -0
- autocoder/terminal/bootstrap.py +178 -0
- autocoder/terminal/command_processor.py +521 -0
- autocoder/terminal/command_registry.py +57 -0
- autocoder/terminal/help.py +97 -0
- autocoder/terminal/tasks/__init__.py +5 -0
- autocoder/terminal/tasks/background.py +77 -0
- autocoder/terminal/tasks/task_event.py +70 -0
- autocoder/terminal/ui/__init__.py +13 -0
- autocoder/terminal/ui/completer.py +268 -0
- autocoder/terminal/ui/keybindings.py +75 -0
- autocoder/terminal/ui/session.py +41 -0
- autocoder/terminal/ui/toolbar.py +64 -0
- autocoder/terminal/utils/__init__.py +13 -0
- autocoder/terminal/utils/errors.py +18 -0
- autocoder/terminal/utils/paths.py +19 -0
- autocoder/terminal/utils/shell.py +43 -0
- autocoder/terminal_v3/__init__.py +10 -0
- autocoder/terminal_v3/app.py +201 -0
- autocoder/terminal_v3/handlers/__init__.py +5 -0
- autocoder/terminal_v3/handlers/command_handler.py +131 -0
- autocoder/terminal_v3/models/__init__.py +6 -0
- autocoder/terminal_v3/models/conversation_buffer.py +214 -0
- autocoder/terminal_v3/models/message.py +50 -0
- autocoder/terminal_v3/models/tool_display.py +247 -0
- autocoder/terminal_v3/ui/__init__.py +7 -0
- autocoder/terminal_v3/ui/keybindings.py +56 -0
- autocoder/terminal_v3/ui/layout.py +141 -0
- autocoder/terminal_v3/ui/styles.py +43 -0
- autocoder/tsproject/__init__.py +23 -23
- autocoder/utils/auto_coder_utils/chat_stream_out.py +1 -1
- autocoder/utils/llms.py +88 -80
- autocoder/utils/math_utils.py +101 -0
- autocoder/utils/model_provider_selector.py +16 -4
- autocoder/utils/operate_config_api.py +33 -5
- autocoder/utils/thread_utils.py +2 -2
- autocoder/version.py +4 -2
- autocoder/workflow_agents/__init__.py +84 -0
- autocoder/workflow_agents/agent.py +143 -0
- autocoder/workflow_agents/exceptions.py +573 -0
- autocoder/workflow_agents/executor.py +489 -0
- autocoder/workflow_agents/loader.py +737 -0
- autocoder/workflow_agents/runner.py +267 -0
- autocoder/workflow_agents/types.py +172 -0
- autocoder/workflow_agents/utils.py +434 -0
- autocoder/workflow_agents/workflow_manager.py +211 -0
- auto_coder-1.0.0.dist-info/METADATA +0 -396
- auto_coder-1.0.0.dist-info/RECORD +0 -442
- auto_coder-1.0.0.dist-info/licenses/LICENSE +0 -201
- autocoder/auto_coder_server.py +0 -672
- autocoder/benchmark.py +0 -138
- autocoder/common/ac_style_command_parser/example.py +0 -7
- autocoder/common/cleaner.py +0 -31
- autocoder/common/command_completer_v2.py +0 -615
- autocoder/common/context_pruner.py +0 -477
- autocoder/common/conversation_pruner.py +0 -132
- autocoder/common/directory_cache/__init__.py +0 -1
- autocoder/common/directory_cache/cache.py +0 -192
- autocoder/common/directory_cache/test_cache.py +0 -190
- autocoder/common/file_checkpoint/examples.py +0 -217
- autocoder/common/llm_friendly_package_example.py +0 -138
- autocoder/common/llm_friendly_package_test.py +0 -63
- autocoder/common/pull_requests/test_module.py +0 -1
- autocoder/common/rulefiles/autocoderrules_utils.py +0 -484
- autocoder/common/text.py +0 -30
- autocoder/common/v2/agent/agentic_edit_tools/list_package_info_tool_resolver.py +0 -42
- autocoder/common/v2/agent/agentic_edit_tools/test_execute_command_tool_resolver.py +0 -70
- autocoder/common/v2/agent/agentic_edit_tools/test_search_files_tool_resolver.py +0 -163
- autocoder/common/v2/agent/agentic_tool_display.py +0 -183
- autocoder/plugins/dynamic_completion_example.py +0 -148
- autocoder/plugins/sample_plugin.py +0 -160
- autocoder/sdk/cli/__main__.py +0 -26
- autocoder/sdk/cli/completion_wrapper.py +0 -38
- autocoder/sdk/cli/install_completion.py +0 -301
- autocoder/sdk/models/messages.py +0 -209
- autocoder/sdk/session/__init__.py +0 -32
- autocoder/sdk/session/session.py +0 -106
- autocoder/sdk/session/session_manager.py +0 -56
- {auto_coder-1.0.0.dist-info → auto_coder-2.0.0.dist-info}/top_level.txt +0 -0
- /autocoder/{sdk/example.py → common/agent_query_queue/__init__.py} +0 -0
|
@@ -1,147 +1,370 @@
|
|
|
1
|
-
from typing import List, Dict, Any, Union
|
|
1
|
+
from typing import List, Dict, Any, Union, Optional
|
|
2
2
|
import json
|
|
3
3
|
import re
|
|
4
|
-
|
|
4
|
+
import copy
|
|
5
5
|
import byzerllm
|
|
6
6
|
from autocoder.common.printer import Printer
|
|
7
|
-
from autocoder.
|
|
7
|
+
from autocoder.common.tokens import count_string_tokens
|
|
8
8
|
from loguru import logger
|
|
9
9
|
from autocoder.common import AutoCoderArgs
|
|
10
|
+
from autocoder.common.autocoderargs_parser import AutoCoderArgsParser
|
|
10
11
|
from autocoder.common.save_formatted_log import save_formatted_log
|
|
12
|
+
from autocoder.common.wrap_llm_hint.utils import merge_with_last_user_message
|
|
13
|
+
from autocoder.common.conversations.get_conversation_manager import get_conversation_manager
|
|
14
|
+
from .tool_content_detector import ToolContentDetector
|
|
15
|
+
from .conversation_message_ids_api import get_conversation_message_ids_api
|
|
16
|
+
from .conversation_message_ids_pruner import ConversationMessageIdsPruner
|
|
11
17
|
|
|
12
|
-
class AgenticPruneStrategy(BaseModel):
|
|
13
|
-
name: str
|
|
14
|
-
description: str
|
|
15
|
-
config: Dict[str, Any] = {"safe_zone_tokens": 0}
|
|
16
18
|
|
|
17
19
|
class AgenticConversationPruner:
|
|
18
20
|
"""
|
|
19
21
|
Specialized conversation pruner for agentic conversations that cleans up tool outputs.
|
|
20
|
-
|
|
22
|
+
|
|
21
23
|
This pruner specifically targets tool result messages (role='user', content contains '<tool_result>')
|
|
22
24
|
and replaces their content with a placeholder message to reduce token usage while maintaining
|
|
23
25
|
conversation flow.
|
|
24
26
|
"""
|
|
25
|
-
|
|
26
|
-
def __init__(self, args: AutoCoderArgs, llm: Union[byzerllm.ByzerLLM, byzerllm.SimpleByzerLLM]):
|
|
27
|
+
|
|
28
|
+
def __init__(self, args: AutoCoderArgs, llm: Union[byzerllm.ByzerLLM, byzerllm.SimpleByzerLLM, None], conversation_id: Optional[str] = None):
|
|
29
|
+
if conversation_id is None:
|
|
30
|
+
raise ValueError("conversation_id is required in AgenticConversationPruner")
|
|
31
|
+
|
|
27
32
|
self.args = args
|
|
28
33
|
self.llm = llm
|
|
34
|
+
self.conversation_id = conversation_id
|
|
29
35
|
self.printer = Printer()
|
|
30
36
|
self.replacement_message = "This message has been cleared. If you still want to get this information, you can call the tool again to retrieve it."
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
37
|
+
|
|
38
|
+
# Initialize AutoCoderArgs parser for flexible parameter parsing
|
|
39
|
+
self.args_parser = AutoCoderArgsParser()
|
|
40
|
+
|
|
41
|
+
# Initialize tool content detector
|
|
42
|
+
self.tool_content_detector = ToolContentDetector(
|
|
43
|
+
replacement_message="Content cleared to save tokens"
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
# Initialize message IDs-based pruning components
|
|
47
|
+
self.message_ids_api = get_conversation_message_ids_api()
|
|
48
|
+
self.message_ids_pruner = ConversationMessageIdsPruner()
|
|
49
|
+
|
|
50
|
+
# Track pruning statistics
|
|
51
|
+
self.pruning_stats = {
|
|
52
|
+
"range_pruning_applied": False,
|
|
53
|
+
"range_pruning_success": False,
|
|
54
|
+
"original_length": 0,
|
|
55
|
+
"after_range_pruning": 0,
|
|
56
|
+
"after_tool_cleanup": 0,
|
|
57
|
+
"total_compression_ratio": 1.0
|
|
38
58
|
}
|
|
39
59
|
|
|
40
|
-
def
|
|
41
|
-
"""
|
|
42
|
-
|
|
60
|
+
def _get_current_conversation_id(self) -> str:
|
|
61
|
+
"""
|
|
62
|
+
Get the current conversation ID from the constructor parameter.
|
|
43
63
|
|
|
44
|
-
|
|
45
|
-
|
|
64
|
+
Returns:
|
|
65
|
+
Current conversation ID (guaranteed to be not None)
|
|
46
66
|
"""
|
|
47
|
-
|
|
67
|
+
return self.conversation_id
|
|
68
|
+
|
|
69
|
+
def _get_parsed_safe_zone_tokens(self) -> int:
|
|
70
|
+
"""
|
|
71
|
+
解析 conversation_prune_safe_zone_tokens 参数,支持多种格式
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
解析后的 token 数量
|
|
75
|
+
"""
|
|
76
|
+
# 添加调试信息
|
|
77
|
+
raw_value = self.args.conversation_prune_safe_zone_tokens
|
|
78
|
+
code_model = self.args.code_model or self.args.model
|
|
79
|
+
|
|
80
|
+
result = self.args_parser.parse_conversation_prune_safe_zone_tokens(
|
|
81
|
+
raw_value,
|
|
82
|
+
code_model
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
# 防护逻辑:如果结果为 0,使用默认值
|
|
86
|
+
if result == 0:
|
|
87
|
+
default_value = 50 * 1024
|
|
88
|
+
print(f"[WARNING] conversation_prune_safe_zone_tokens 为 0,使用默认值: {default_value}")
|
|
89
|
+
return default_value
|
|
48
90
|
|
|
91
|
+
return result
|
|
92
|
+
|
|
93
|
+
def prune_conversations(self, conversations: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
94
|
+
"""
|
|
95
|
+
Prune conversations by applying range-based pruning first, then cleaning up tool outputs and tool call content.
|
|
96
|
+
|
|
49
97
|
Args:
|
|
50
98
|
conversations: Original conversation list
|
|
51
|
-
|
|
52
|
-
|
|
99
|
+
|
|
53
100
|
Returns:
|
|
54
101
|
Pruned conversation list
|
|
55
|
-
"""
|
|
56
|
-
safe_zone_tokens = self.
|
|
57
|
-
|
|
58
|
-
|
|
102
|
+
"""
|
|
103
|
+
safe_zone_tokens = self._get_parsed_safe_zone_tokens()
|
|
104
|
+
# print(f"safe_zone_tokens: {safe_zone_tokens}")
|
|
105
|
+
|
|
106
|
+
# 保存原始conversations的深拷贝,用于最终对比分析
|
|
107
|
+
# original_conversations = copy.deepcopy(conversations)
|
|
108
|
+
original_length = len(conversations)
|
|
109
|
+
|
|
110
|
+
# Initialize pruning statistics
|
|
111
|
+
self.pruning_stats["original_length"] = original_length
|
|
112
|
+
|
|
113
|
+
current_tokens = count_string_tokens(
|
|
114
|
+
json.dumps(conversations, ensure_ascii=False))
|
|
115
|
+
|
|
59
116
|
if current_tokens <= safe_zone_tokens:
|
|
117
|
+
# Update stats for no pruning needed
|
|
118
|
+
self.pruning_stats.update({
|
|
119
|
+
"after_range_pruning": original_length,
|
|
120
|
+
"after_tool_cleanup": original_length,
|
|
121
|
+
"total_compression_ratio": 1.0
|
|
122
|
+
})
|
|
60
123
|
return conversations
|
|
61
124
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
125
|
+
# Step 1: Apply message ids pruning if conversation_id is provided
|
|
126
|
+
processed_conversations = self._apply_message_ids_pruning(
|
|
127
|
+
conversations)
|
|
128
|
+
logger.info(
|
|
129
|
+
f"After Message IDs pruning: {len(conversations)} -> {len(processed_conversations)} messages")
|
|
130
|
+
|
|
131
|
+
# Check if we're within safe zone after range pruning
|
|
132
|
+
current_tokens = count_string_tokens(json.dumps(
|
|
133
|
+
processed_conversations, ensure_ascii=False))
|
|
134
|
+
|
|
135
|
+
# Step 2: Apply tool cleanup if still needed
|
|
136
|
+
if current_tokens > safe_zone_tokens:
|
|
137
|
+
config = {"safe_zone_tokens": safe_zone_tokens}
|
|
138
|
+
processed_conversations = self._unified_tool_cleanup_prune(
|
|
139
|
+
processed_conversations, config)
|
|
140
|
+
|
|
141
|
+
# Update final statistics
|
|
142
|
+
final_length = len(processed_conversations)
|
|
143
|
+
self.pruning_stats["after_tool_cleanup"] = final_length
|
|
144
|
+
self.pruning_stats["total_compression_ratio"] = final_length / \
|
|
145
|
+
original_length if original_length > 0 else 1.0
|
|
146
|
+
|
|
147
|
+
# Log overall pruning results
|
|
148
|
+
logger.info(f"Complete pruning: {original_length} -> {final_length} messages "
|
|
149
|
+
f"(total compression: {self.pruning_stats['total_compression_ratio']:.2%})")
|
|
150
|
+
|
|
151
|
+
# if the processed_conversations is still too long, we should add a user message to ask the LLM to clean up the conversation
|
|
152
|
+
final_tokens = count_string_tokens(json.dumps(
|
|
153
|
+
processed_conversations, ensure_ascii=False))
|
|
154
|
+
if final_tokens > safe_zone_tokens:
|
|
155
|
+
cleanup_message = "The conversation is still too long, please use conversation_message_ids_write tool to save the message ids to be deleted."
|
|
69
156
|
|
|
70
|
-
|
|
71
|
-
|
|
157
|
+
# Use standardized hint merging from wrap_llm_hint module
|
|
158
|
+
processed_conversations = merge_with_last_user_message(
|
|
159
|
+
processed_conversations, cleanup_message)
|
|
160
|
+
# 执行裁剪前后对比分析并记录到日志
|
|
161
|
+
# self._compare_and_log_conversations(original_conversations, processed_conversations)
|
|
162
|
+
|
|
163
|
+
save_formatted_log(self.args.source_dir, json.dumps(processed_conversations, ensure_ascii=False),
|
|
164
|
+
"agentic_pruned_conversation", conversation_id=self._get_current_conversation_id())
|
|
165
|
+
|
|
166
|
+
return processed_conversations
|
|
167
|
+
|
|
168
|
+
def _apply_message_ids_pruning(self, conversations: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
72
169
|
"""
|
|
73
|
-
|
|
74
|
-
|
|
170
|
+
Apply message IDs-based pruning if conversation_id is provided and message IDs configuration exists.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
conversations: Original conversation list
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
Conversations after message IDs pruning (or original if no message IDs config)
|
|
177
|
+
"""
|
|
178
|
+
# Check if we have conversation_id and message IDs configuration
|
|
179
|
+
conversation_id = self._get_current_conversation_id()
|
|
180
|
+
if not conversation_id:
|
|
181
|
+
logger.debug(
|
|
182
|
+
"No conversation_id provided, skipping message IDs pruning")
|
|
183
|
+
self.pruning_stats["after_range_pruning"] = len(conversations)
|
|
184
|
+
return conversations
|
|
185
|
+
|
|
186
|
+
# Get message IDs configuration for this conversation
|
|
187
|
+
conversation_message_ids = self.message_ids_api.get_conversation_message_ids(
|
|
188
|
+
conversation_id)
|
|
189
|
+
if not conversation_message_ids:
|
|
190
|
+
logger.debug(
|
|
191
|
+
f"No message IDs configuration found for conversation {conversation_id}, skipping message IDs pruning")
|
|
192
|
+
self.pruning_stats["after_range_pruning"] = len(conversations)
|
|
193
|
+
return conversations
|
|
194
|
+
|
|
195
|
+
# Apply message IDs pruning
|
|
196
|
+
logger.info(
|
|
197
|
+
f"Applying message IDs pruning for conversation {conversation_id}")
|
|
198
|
+
self.pruning_stats["range_pruning_applied"] = True
|
|
199
|
+
|
|
200
|
+
try:
|
|
201
|
+
pruning_result = self.message_ids_pruner.prune_conversations(
|
|
202
|
+
conversations, conversation_message_ids)
|
|
203
|
+
logger.info(f"Message IDs: {pruning_result}")
|
|
204
|
+
|
|
205
|
+
if pruning_result.success:
|
|
206
|
+
self.pruning_stats["range_pruning_success"] = True
|
|
207
|
+
self.pruning_stats["after_range_pruning"] = pruning_result.pruned_length
|
|
208
|
+
|
|
209
|
+
# Log message IDs pruning results
|
|
210
|
+
logger.info(f"Message IDs pruning completed: {pruning_result.original_length} -> {pruning_result.pruned_length} messages "
|
|
211
|
+
f"(message IDs compression: {pruning_result.compression_ratio:.2%})")
|
|
212
|
+
|
|
213
|
+
# Log warnings if any
|
|
214
|
+
if pruning_result.warnings:
|
|
215
|
+
for warning in pruning_result.warnings:
|
|
216
|
+
logger.warning(f"Message IDs pruning: {warning}")
|
|
217
|
+
|
|
218
|
+
return pruning_result.pruned_conversations
|
|
219
|
+
else:
|
|
220
|
+
# Message IDs pruning failed, log error and continue with original conversations
|
|
221
|
+
logger.error(
|
|
222
|
+
f"Message IDs pruning failed: {pruning_result.error_message}")
|
|
223
|
+
self.pruning_stats["after_range_pruning"] = len(conversations)
|
|
224
|
+
return conversations
|
|
225
|
+
|
|
226
|
+
except Exception as e:
|
|
227
|
+
logger.error(
|
|
228
|
+
f"Message IDs pruning failed with exception: {str(e)}")
|
|
229
|
+
self.pruning_stats["after_range_pruning"] = len(conversations)
|
|
230
|
+
return conversations
|
|
231
|
+
|
|
232
|
+
def _unified_tool_cleanup_prune(self, conversations: List[Dict[str, Any]],
|
|
233
|
+
config: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
234
|
+
"""
|
|
235
|
+
Clean up both tool output results and tool call content in a unified process.
|
|
236
|
+
|
|
75
237
|
This method:
|
|
76
|
-
1. Identifies tool result messages (role='user' with '<tool_result' in content)
|
|
77
|
-
|
|
78
|
-
|
|
238
|
+
1. Identifies both tool result messages (role='user' with '<tool_result' in content)
|
|
239
|
+
and assistant messages containing tool calls with large content
|
|
240
|
+
2. Processes all cleanable messages in order, prioritizing tool results first
|
|
241
|
+
3. Stops when token count is within safe zone OR when less than 6 unpruned messages remain
|
|
79
242
|
"""
|
|
80
|
-
safe_zone_tokens = config.get("safe_zone_tokens",
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
243
|
+
safe_zone_tokens = config.get("safe_zone_tokens", 80 * 1024)
|
|
244
|
+
# 使用深拷贝避免修改原始数据
|
|
245
|
+
processed_conversations = copy.deepcopy(conversations)
|
|
246
|
+
|
|
247
|
+
# 预先计算初始 token 数量,避免在循环中引用未定义的变量
|
|
248
|
+
current_tokens = count_string_tokens(json.dumps(
|
|
249
|
+
processed_conversations, ensure_ascii=False))
|
|
250
|
+
|
|
251
|
+
# Find all cleanable message indices with their types
|
|
252
|
+
cleanable_messages = []
|
|
253
|
+
|
|
254
|
+
# Find both tool result messages and tool call messages in one loop
|
|
85
255
|
for i, conv in enumerate(processed_conversations):
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
256
|
+
content = conv.get("content", "")
|
|
257
|
+
role = conv.get("role")
|
|
258
|
+
|
|
259
|
+
if isinstance(content, str):
|
|
260
|
+
# Check for tool result messages (user role)
|
|
261
|
+
if (role == "user" and self._is_tool_result_message(content)):
|
|
262
|
+
cleanable_messages.append(
|
|
263
|
+
{"index": i, "type": "tool_result"})
|
|
264
|
+
# Check for assistant messages with tool calls
|
|
265
|
+
elif (role == "assistant" and self.tool_content_detector.is_tool_call_content(content)):
|
|
266
|
+
cleanable_messages.append(
|
|
267
|
+
{"index": i, "type": "tool_call"})
|
|
268
|
+
|
|
269
|
+
# Sort by index to process in order, but prioritize tool_result messages
|
|
270
|
+
cleanable_messages.sort(key=lambda x: (
|
|
271
|
+
x["index"], x["type"] != "tool_result"))
|
|
272
|
+
|
|
273
|
+
logger.info(f"Found {len([m for m in cleanable_messages if m['type'] == 'tool_result'])} tool result messages "
|
|
274
|
+
f"and {len([m for m in cleanable_messages if m['type'] == 'tool_call'])} tool call messages to potentially clean")
|
|
275
|
+
|
|
276
|
+
# Track cleaned messages
|
|
277
|
+
cleaned_count = 0
|
|
278
|
+
|
|
279
|
+
# Clean messages one by one
|
|
280
|
+
for i, message_info in enumerate(cleanable_messages):
|
|
281
|
+
# 更新当前 token 数量
|
|
282
|
+
current_tokens = count_string_tokens(json.dumps(
|
|
283
|
+
processed_conversations, ensure_ascii=False))
|
|
284
|
+
|
|
285
|
+
# 检查停止条件
|
|
286
|
+
# 1. Token数已经在安全区域内
|
|
97
287
|
if current_tokens <= safe_zone_tokens:
|
|
98
|
-
logger.info(
|
|
288
|
+
logger.info(
|
|
289
|
+
f"Token count ({current_tokens}) is within safe zone ({safe_zone_tokens}), stopping cleanup")
|
|
99
290
|
break
|
|
100
|
-
|
|
101
|
-
# Extract tool name for a more specific replacement message
|
|
102
|
-
tool_name = self._extract_tool_name(processed_conversations[tool_index]["content"])
|
|
103
|
-
replacement_content = self._generate_replacement_message(tool_name)
|
|
104
|
-
|
|
105
|
-
# Replace the content
|
|
106
|
-
original_content = processed_conversations[tool_index]["content"]
|
|
107
|
-
processed_conversations[tool_index]["content"] = replacement_content
|
|
108
|
-
|
|
109
|
-
logger.info(f"Cleaned tool result at index {tool_index} (tool: {tool_name}), "
|
|
110
|
-
f"reduced from {len(original_content)} to {len(replacement_content)} characters")
|
|
111
|
-
|
|
112
|
-
final_tokens = count_tokens(json.dumps(processed_conversations, ensure_ascii=False))
|
|
113
|
-
logger.info(f"Cleanup completed. Token count: {current_tokens} -> {final_tokens}")
|
|
114
291
|
|
|
115
|
-
|
|
116
|
-
|
|
292
|
+
# 2. 剩余未裁剪的对话少于6段
|
|
293
|
+
remaining_unpruned = len(
|
|
294
|
+
cleanable_messages) - (i + 1) # i+1 因为i是从0开始的索引
|
|
295
|
+
if remaining_unpruned < 6:
|
|
296
|
+
logger.info(
|
|
297
|
+
f"Less than 6 unpruned messages remaining ({remaining_unpruned}), stopping cleanup")
|
|
298
|
+
break
|
|
299
|
+
|
|
300
|
+
msg_index = message_info["index"]
|
|
301
|
+
msg_type = message_info["type"]
|
|
302
|
+
original_content = processed_conversations[msg_index]["content"]
|
|
303
|
+
|
|
304
|
+
if msg_type == "tool_result":
|
|
305
|
+
# Handle tool result cleanup
|
|
306
|
+
tool_name = self._extract_tool_name(original_content)
|
|
307
|
+
replacement_content = self._generate_replacement_message(
|
|
308
|
+
tool_name)
|
|
309
|
+
processed_conversations[msg_index]["content"] = replacement_content
|
|
310
|
+
cleaned_count += 1
|
|
311
|
+
|
|
312
|
+
logger.info(f"Cleaned tool result at index {msg_index} (tool: {tool_name}), "
|
|
313
|
+
f"reduced from {len(original_content)} to {len(replacement_content)} characters")
|
|
314
|
+
|
|
315
|
+
elif msg_type == "tool_call":
|
|
316
|
+
# Handle tool call content cleanup
|
|
317
|
+
tool_info = self.tool_content_detector.detect_tool_call(
|
|
318
|
+
original_content)
|
|
319
|
+
|
|
320
|
+
if tool_info:
|
|
321
|
+
new_content, replaced = self.tool_content_detector.replace_tool_content(
|
|
322
|
+
original_content, max_content_length=500
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
if replaced:
|
|
326
|
+
processed_conversations[msg_index]["content"] = new_content
|
|
327
|
+
cleaned_count += 1
|
|
328
|
+
logger.info(f"Cleaned tool call content at index {msg_index} (tool: {tool_info['tool_name']}), "
|
|
329
|
+
f"reduced from {len(original_content)} to {len(new_content)} characters")
|
|
330
|
+
|
|
331
|
+
final_tokens = count_string_tokens(json.dumps(
|
|
332
|
+
processed_conversations, ensure_ascii=False))
|
|
333
|
+
initial_tokens = count_string_tokens(
|
|
334
|
+
json.dumps(conversations, ensure_ascii=False))
|
|
335
|
+
logger.info(
|
|
336
|
+
f"Unified tool cleanup completed. Cleaned {cleaned_count} messages. Token count: {initial_tokens} -> {final_tokens}")
|
|
337
|
+
|
|
117
338
|
return processed_conversations
|
|
118
339
|
|
|
119
340
|
def _is_tool_result_message(self, content: str) -> bool:
|
|
120
341
|
"""
|
|
121
342
|
Check if a message content contains tool result XML.
|
|
122
|
-
|
|
343
|
+
|
|
123
344
|
Args:
|
|
124
345
|
content: Message content to check
|
|
125
|
-
|
|
346
|
+
|
|
126
347
|
Returns:
|
|
127
348
|
True if content contains tool result format
|
|
128
349
|
"""
|
|
350
|
+
if content is None:
|
|
351
|
+
return False
|
|
129
352
|
return "<tool_result" in content and "tool_name=" in content
|
|
130
353
|
|
|
131
354
|
def _extract_tool_name(self, content: str) -> str:
|
|
132
355
|
"""
|
|
133
356
|
Extract tool name from tool result XML content.
|
|
134
|
-
|
|
357
|
+
|
|
135
358
|
Args:
|
|
136
359
|
content: Tool result XML content
|
|
137
|
-
|
|
360
|
+
|
|
138
361
|
Returns:
|
|
139
362
|
Tool name or 'unknown' if not found
|
|
140
363
|
"""
|
|
141
364
|
# Pattern to match: <tool_result tool_name='...' or <tool_result tool_name="..."
|
|
142
|
-
pattern = r"<tool_result[^>]*tool_name=['\"]([^'\"]
|
|
365
|
+
pattern = r"<tool_result[^>]*tool_name=['\"]([^'\"]*)['\"]"
|
|
143
366
|
match = re.search(pattern, content)
|
|
144
|
-
|
|
367
|
+
|
|
145
368
|
if match:
|
|
146
369
|
return match.group(1)
|
|
147
370
|
return "unknown"
|
|
@@ -149,49 +372,375 @@ class AgenticConversationPruner:
|
|
|
149
372
|
def _generate_replacement_message(self, tool_name: str) -> str:
|
|
150
373
|
"""
|
|
151
374
|
Generate a replacement message for a cleaned tool result.
|
|
152
|
-
|
|
375
|
+
|
|
153
376
|
Args:
|
|
154
377
|
tool_name: Name of the tool that was called
|
|
155
|
-
|
|
378
|
+
|
|
156
379
|
Returns:
|
|
157
380
|
Replacement message string
|
|
158
381
|
"""
|
|
159
382
|
if tool_name and tool_name != "unknown":
|
|
160
383
|
return (f"<tool_result tool_name='{tool_name}' success='true'>"
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
384
|
+
f"<message>Content cleared to save tokens</message>"
|
|
385
|
+
f"<content>{self.replacement_message}</content>"
|
|
386
|
+
f"</tool_result>")
|
|
164
387
|
else:
|
|
165
388
|
return f"<tool_result success='true'><message>[Content cleared to save tokens, you can call the tool again to get the tool result.]</message><content>{self.replacement_message}</content></tool_result>"
|
|
166
389
|
|
|
167
|
-
def get_cleanup_statistics(self, original_conversations: List[Dict[str, Any]],
|
|
168
|
-
|
|
390
|
+
def get_cleanup_statistics(self, original_conversations: List[Dict[str, Any]],
|
|
391
|
+
pruned_conversations: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
169
392
|
"""
|
|
170
393
|
Get statistics about the cleanup process.
|
|
171
|
-
|
|
394
|
+
|
|
172
395
|
Args:
|
|
173
396
|
original_conversations: Original conversation list
|
|
174
397
|
pruned_conversations: Pruned conversation list
|
|
175
|
-
|
|
398
|
+
|
|
176
399
|
Returns:
|
|
177
400
|
Dictionary with cleanup statistics
|
|
178
401
|
"""
|
|
179
|
-
original_tokens =
|
|
180
|
-
|
|
181
|
-
|
|
402
|
+
original_tokens = count_string_tokens(
|
|
403
|
+
json.dumps(original_conversations, ensure_ascii=False))
|
|
404
|
+
pruned_tokens = count_string_tokens(
|
|
405
|
+
json.dumps(pruned_conversations, ensure_ascii=False))
|
|
406
|
+
|
|
182
407
|
# Count cleaned tool results
|
|
183
|
-
|
|
408
|
+
tool_results_cleaned = 0
|
|
409
|
+
tool_calls_cleaned = 0
|
|
410
|
+
|
|
184
411
|
for orig, pruned in zip(original_conversations, pruned_conversations):
|
|
185
|
-
if
|
|
186
|
-
|
|
187
|
-
orig.get("
|
|
188
|
-
|
|
189
|
-
|
|
412
|
+
if orig.get("content") != pruned.get("content"):
|
|
413
|
+
# Check if it's a tool result message (user role)
|
|
414
|
+
if (orig.get("role") == "user" and
|
|
415
|
+
self._is_tool_result_message(orig.get("content", ""))):
|
|
416
|
+
tool_results_cleaned += 1
|
|
417
|
+
|
|
418
|
+
# Check if it's a tool call message (assistant role)
|
|
419
|
+
elif (orig.get("role") == "assistant" and
|
|
420
|
+
self.tool_content_detector.is_tool_call_content(orig.get("content", ""))):
|
|
421
|
+
tool_calls_cleaned += 1
|
|
422
|
+
|
|
190
423
|
return {
|
|
191
424
|
"original_tokens": original_tokens,
|
|
192
425
|
"pruned_tokens": pruned_tokens,
|
|
193
426
|
"tokens_saved": original_tokens - pruned_tokens,
|
|
194
427
|
"compression_ratio": pruned_tokens / original_tokens if original_tokens > 0 else 1.0,
|
|
195
|
-
"tool_results_cleaned":
|
|
428
|
+
"tool_results_cleaned": tool_results_cleaned,
|
|
429
|
+
"tool_calls_cleaned": tool_calls_cleaned,
|
|
196
430
|
"total_messages": len(original_conversations)
|
|
197
|
-
}
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
def get_pruning_statistics(self) -> Dict[str, Any]:
|
|
434
|
+
"""
|
|
435
|
+
Get comprehensive pruning statistics including both range and tool cleanup.
|
|
436
|
+
|
|
437
|
+
Returns:
|
|
438
|
+
Dictionary with complete pruning statistics
|
|
439
|
+
"""
|
|
440
|
+
return {
|
|
441
|
+
"range_pruning": {
|
|
442
|
+
"applied": self.pruning_stats["range_pruning_applied"],
|
|
443
|
+
"success": self.pruning_stats["range_pruning_success"],
|
|
444
|
+
"conversation_id": self._get_current_conversation_id()
|
|
445
|
+
},
|
|
446
|
+
"message_counts": {
|
|
447
|
+
"original": self.pruning_stats["original_length"],
|
|
448
|
+
"after_range_pruning": self.pruning_stats["after_range_pruning"],
|
|
449
|
+
"after_tool_cleanup": self.pruning_stats["after_tool_cleanup"]
|
|
450
|
+
},
|
|
451
|
+
"compression": {
|
|
452
|
+
"range_pruning_ratio": (
|
|
453
|
+
self.pruning_stats["after_range_pruning"] /
|
|
454
|
+
self.pruning_stats["original_length"]
|
|
455
|
+
if self.pruning_stats["original_length"] > 0 else 1.0
|
|
456
|
+
),
|
|
457
|
+
"tool_cleanup_ratio": (
|
|
458
|
+
self.pruning_stats["after_tool_cleanup"] /
|
|
459
|
+
self.pruning_stats["after_range_pruning"]
|
|
460
|
+
if self.pruning_stats["after_range_pruning"] > 0 else 1.0
|
|
461
|
+
),
|
|
462
|
+
"total_compression_ratio": self.pruning_stats["total_compression_ratio"]
|
|
463
|
+
},
|
|
464
|
+
"messages_removed": {
|
|
465
|
+
"by_range_pruning": (
|
|
466
|
+
self.pruning_stats["original_length"] -
|
|
467
|
+
self.pruning_stats["after_range_pruning"]
|
|
468
|
+
),
|
|
469
|
+
"by_tool_cleanup": (
|
|
470
|
+
self.pruning_stats["after_range_pruning"] -
|
|
471
|
+
self.pruning_stats["after_tool_cleanup"]
|
|
472
|
+
),
|
|
473
|
+
"total_removed": (
|
|
474
|
+
self.pruning_stats["original_length"] -
|
|
475
|
+
self.pruning_stats["after_tool_cleanup"]
|
|
476
|
+
)
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
def _compare_and_log_conversations(self, original_conversations: List[Dict[str, Any]],
|
|
481
|
+
pruned_conversations: List[Dict[str, Any]]) -> None:
|
|
482
|
+
"""
|
|
483
|
+
独立的方法:对比裁剪前后的conversations,生成详细的对比报告并记录到日志中。
|
|
484
|
+
|
|
485
|
+
Args:
|
|
486
|
+
original_conversations: 裁剪前的对话列表
|
|
487
|
+
pruned_conversations: 裁剪后的对话列表
|
|
488
|
+
"""
|
|
489
|
+
try:
|
|
490
|
+
# 生成对比分析报告
|
|
491
|
+
comparison_report = self._generate_comparison_report(
|
|
492
|
+
original_conversations, pruned_conversations)
|
|
493
|
+
|
|
494
|
+
# 记录详细的对比日志
|
|
495
|
+
logger.info("=== 对话裁剪前后对比分析 ===")
|
|
496
|
+
logger.info(
|
|
497
|
+
f"原始对话数量: {comparison_report['message_counts']['original']}")
|
|
498
|
+
logger.info(
|
|
499
|
+
f"裁剪后对话数量: {comparison_report['message_counts']['final']}")
|
|
500
|
+
logger.info(
|
|
501
|
+
f"删除的对话数量: {comparison_report['message_counts']['removed']}")
|
|
502
|
+
logger.info(
|
|
503
|
+
f"消息压缩比: {comparison_report['compression']['message_compression_ratio']:.2%}")
|
|
504
|
+
logger.info(
|
|
505
|
+
f"Token压缩比: {comparison_report['compression']['token_compression_ratio']:.2%}")
|
|
506
|
+
logger.info(f"Token节省数量: {comparison_report['tokens']['saved']:,}")
|
|
507
|
+
|
|
508
|
+
if comparison_report['changes']['tool_results_modified'] > 0:
|
|
509
|
+
logger.info(
|
|
510
|
+
f"工具结果消息清理数量: {comparison_report['changes']['tool_results_modified']}")
|
|
511
|
+
|
|
512
|
+
if comparison_report['changes']['tool_calls_modified'] > 0:
|
|
513
|
+
logger.info(
|
|
514
|
+
f"工具调用内容清理数量: {comparison_report['changes']['tool_calls_modified']}")
|
|
515
|
+
|
|
516
|
+
if comparison_report['changes']['messages_removed_by_ids'] > 0:
|
|
517
|
+
logger.info(
|
|
518
|
+
f"基于消息ID删除的消息数量: {comparison_report['changes']['messages_removed_by_ids']}")
|
|
519
|
+
|
|
520
|
+
# 保存详细的对比报告到文件日志
|
|
521
|
+
save_formatted_log(
|
|
522
|
+
self.args.source_dir,
|
|
523
|
+
json.dumps(comparison_report, ensure_ascii=False, indent=2),
|
|
524
|
+
"conversation_comparison_report",
|
|
525
|
+
conversation_id=self._get_current_conversation_id()
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
except Exception as e:
|
|
529
|
+
logger.error(f"生成对话对比报告时出错: {str(e)}")
|
|
530
|
+
logger.exception(e)
|
|
531
|
+
|
|
532
|
+
def _generate_comparison_report(self, original_conversations: List[Dict[str, Any]],
|
|
533
|
+
pruned_conversations: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
534
|
+
"""
|
|
535
|
+
生成详细的对比分析报告。
|
|
536
|
+
|
|
537
|
+
Args:
|
|
538
|
+
original_conversations: 裁剪前的对话列表
|
|
539
|
+
pruned_conversations: 裁剪后的对话列表
|
|
540
|
+
|
|
541
|
+
Returns:
|
|
542
|
+
包含详细对比信息的字典
|
|
543
|
+
"""
|
|
544
|
+
# 基础统计信息
|
|
545
|
+
original_count = len(original_conversations)
|
|
546
|
+
pruned_count = len(pruned_conversations)
|
|
547
|
+
removed_count = original_count - pruned_count
|
|
548
|
+
|
|
549
|
+
# Token统计
|
|
550
|
+
original_tokens = count_string_tokens(
|
|
551
|
+
json.dumps(original_conversations, ensure_ascii=False))
|
|
552
|
+
pruned_tokens = count_string_tokens(
|
|
553
|
+
json.dumps(pruned_conversations, ensure_ascii=False))
|
|
554
|
+
tokens_saved = original_tokens - pruned_tokens
|
|
555
|
+
|
|
556
|
+
# 分析变化详情
|
|
557
|
+
changes_analysis = self._analyze_conversation_changes(
|
|
558
|
+
original_conversations, pruned_conversations)
|
|
559
|
+
|
|
560
|
+
# 分析消息类型分布
|
|
561
|
+
original_distribution = self._analyze_message_distribution(
|
|
562
|
+
original_conversations)
|
|
563
|
+
pruned_distribution = self._analyze_message_distribution(
|
|
564
|
+
pruned_conversations)
|
|
565
|
+
|
|
566
|
+
# 生成完整的对比报告
|
|
567
|
+
report = {
|
|
568
|
+
"timestamp": str(__import__("datetime").datetime.now()),
|
|
569
|
+
"conversation_id": self._get_current_conversation_id(),
|
|
570
|
+
"pruning_strategy": {
|
|
571
|
+
"range_pruning_applied": self.pruning_stats["range_pruning_applied"],
|
|
572
|
+
"tool_cleanup_applied": True,
|
|
573
|
+
"safe_zone_tokens": self._get_parsed_safe_zone_tokens()
|
|
574
|
+
},
|
|
575
|
+
"message_counts": {
|
|
576
|
+
"original": original_count,
|
|
577
|
+
"final": pruned_count,
|
|
578
|
+
"removed": removed_count,
|
|
579
|
+
"after_range_pruning": self.pruning_stats.get("after_range_pruning", original_count)
|
|
580
|
+
},
|
|
581
|
+
"tokens": {
|
|
582
|
+
"original": original_tokens,
|
|
583
|
+
"final": pruned_tokens,
|
|
584
|
+
"saved": tokens_saved,
|
|
585
|
+
"safe_zone_limit": self._get_parsed_safe_zone_tokens()
|
|
586
|
+
},
|
|
587
|
+
"compression": {
|
|
588
|
+
"message_compression_ratio": pruned_count / original_count if original_count > 0 else 1.0,
|
|
589
|
+
"token_compression_ratio": pruned_tokens / original_tokens if original_tokens > 0 else 1.0,
|
|
590
|
+
"range_pruning_compression": (
|
|
591
|
+
self.pruning_stats.get(
|
|
592
|
+
"after_range_pruning", original_count) / original_count
|
|
593
|
+
if original_count > 0 else 1.0
|
|
594
|
+
),
|
|
595
|
+
"tool_cleanup_compression": (
|
|
596
|
+
pruned_count /
|
|
597
|
+
self.pruning_stats.get(
|
|
598
|
+
"after_range_pruning", original_count)
|
|
599
|
+
if self.pruning_stats.get("after_range_pruning", original_count) > 0 else 1.0
|
|
600
|
+
)
|
|
601
|
+
},
|
|
602
|
+
"changes": {
|
|
603
|
+
"messages_removed_by_ids": (
|
|
604
|
+
original_count -
|
|
605
|
+
self.pruning_stats.get(
|
|
606
|
+
"after_range_pruning", original_count)
|
|
607
|
+
),
|
|
608
|
+
"tool_results_modified": changes_analysis["tool_results_modified"],
|
|
609
|
+
"tool_calls_modified": changes_analysis["tool_calls_modified"],
|
|
610
|
+
"content_modifications": changes_analysis["content_modifications"],
|
|
611
|
+
"unchanged_messages": changes_analysis["unchanged_messages"]
|
|
612
|
+
},
|
|
613
|
+
"message_distribution": {
|
|
614
|
+
"original": original_distribution,
|
|
615
|
+
"pruned": pruned_distribution
|
|
616
|
+
},
|
|
617
|
+
"detailed_changes": changes_analysis["detailed_changes"],
|
|
618
|
+
"pruning_effectiveness": {
|
|
619
|
+
"tokens_per_message_before": original_tokens / original_count if original_count > 0 else 0,
|
|
620
|
+
"tokens_per_message_after": pruned_tokens / pruned_count if pruned_count > 0 else 0,
|
|
621
|
+
"average_token_reduction_per_message": tokens_saved / original_count if original_count > 0 else 0,
|
|
622
|
+
"within_safe_zone": pruned_tokens <= self._get_parsed_safe_zone_tokens()
|
|
623
|
+
}
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
return report
|
|
627
|
+
|
|
628
|
+
def _analyze_conversation_changes(self, original_conversations: List[Dict[str, Any]],
|
|
629
|
+
pruned_conversations: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
630
|
+
"""
|
|
631
|
+
分析对话变化的详细信息。
|
|
632
|
+
|
|
633
|
+
Args:
|
|
634
|
+
original_conversations: 原始对话列表
|
|
635
|
+
pruned_conversations: 裁剪后对话列表
|
|
636
|
+
|
|
637
|
+
Returns:
|
|
638
|
+
包含变化分析的字典
|
|
639
|
+
"""
|
|
640
|
+
tool_results_modified = 0
|
|
641
|
+
tool_calls_modified = 0
|
|
642
|
+
content_modifications = 0
|
|
643
|
+
unchanged_messages = 0
|
|
644
|
+
detailed_changes = []
|
|
645
|
+
|
|
646
|
+
# 创建一个映射来匹配原始和裁剪后的消息
|
|
647
|
+
min_length = min(len(original_conversations),
|
|
648
|
+
len(pruned_conversations))
|
|
649
|
+
|
|
650
|
+
for i in range(min_length):
|
|
651
|
+
original_msg = original_conversations[i]
|
|
652
|
+
pruned_msg = pruned_conversations[i]
|
|
653
|
+
|
|
654
|
+
original_content = original_msg.get("content", "")
|
|
655
|
+
pruned_content = pruned_msg.get("content", "")
|
|
656
|
+
|
|
657
|
+
if original_content != pruned_content:
|
|
658
|
+
content_modifications += 1
|
|
659
|
+
|
|
660
|
+
# 分析修改类型
|
|
661
|
+
change_type = "content_modified"
|
|
662
|
+
tool_name = None
|
|
663
|
+
|
|
664
|
+
if (original_msg.get("role") == "user" and
|
|
665
|
+
self._is_tool_result_message(original_content)):
|
|
666
|
+
tool_results_modified += 1
|
|
667
|
+
change_type = "tool_result_cleaned"
|
|
668
|
+
tool_name = self._extract_tool_name(original_content)
|
|
669
|
+
|
|
670
|
+
elif (original_msg.get("role") == "assistant" and
|
|
671
|
+
self.tool_content_detector.is_tool_call_content(original_content)):
|
|
672
|
+
tool_calls_modified += 1
|
|
673
|
+
change_type = "tool_call_cleaned"
|
|
674
|
+
tool_info = self.tool_content_detector.detect_tool_call(
|
|
675
|
+
original_content)
|
|
676
|
+
tool_name = tool_info.get(
|
|
677
|
+
"tool_name") if tool_info else "unknown"
|
|
678
|
+
|
|
679
|
+
detailed_changes.append({
|
|
680
|
+
"message_index": i,
|
|
681
|
+
"role": original_msg.get("role"),
|
|
682
|
+
"change_type": change_type,
|
|
683
|
+
"tool_name": tool_name,
|
|
684
|
+
"original_length": len(original_content),
|
|
685
|
+
"pruned_length": len(pruned_content),
|
|
686
|
+
"size_reduction": len(original_content) - len(pruned_content)
|
|
687
|
+
})
|
|
688
|
+
else:
|
|
689
|
+
unchanged_messages += 1
|
|
690
|
+
|
|
691
|
+
return {
|
|
692
|
+
"tool_results_modified": tool_results_modified,
|
|
693
|
+
"tool_calls_modified": tool_calls_modified,
|
|
694
|
+
"content_modifications": content_modifications,
|
|
695
|
+
"unchanged_messages": unchanged_messages,
|
|
696
|
+
"detailed_changes": detailed_changes
|
|
697
|
+
}
|
|
698
|
+
|
|
699
|
+
def _analyze_message_distribution(self, conversations: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
700
|
+
"""
|
|
701
|
+
分析消息的角色分布和类型分布。
|
|
702
|
+
|
|
703
|
+
Args:
|
|
704
|
+
conversations: 对话列表
|
|
705
|
+
|
|
706
|
+
Returns:
|
|
707
|
+
包含分布信息的字典
|
|
708
|
+
"""
|
|
709
|
+
role_counts = {"user": 0, "assistant": 0, "system": 0, "other": 0}
|
|
710
|
+
message_types = {
|
|
711
|
+
"tool_result": 0,
|
|
712
|
+
"tool_call": 0,
|
|
713
|
+
"regular_user": 0,
|
|
714
|
+
"regular_assistant": 0,
|
|
715
|
+
"system": 0
|
|
716
|
+
}
|
|
717
|
+
|
|
718
|
+
for msg in conversations:
|
|
719
|
+
role = msg.get("role", "other")
|
|
720
|
+
content = msg.get("content", "")
|
|
721
|
+
|
|
722
|
+
# 统计角色分布
|
|
723
|
+
if role in role_counts:
|
|
724
|
+
role_counts[role] += 1
|
|
725
|
+
else:
|
|
726
|
+
role_counts["other"] += 1
|
|
727
|
+
|
|
728
|
+
# 统计消息类型分布
|
|
729
|
+
if role == "system":
|
|
730
|
+
message_types["system"] += 1
|
|
731
|
+
elif role == "user":
|
|
732
|
+
if self._is_tool_result_message(content):
|
|
733
|
+
message_types["tool_result"] += 1
|
|
734
|
+
else:
|
|
735
|
+
message_types["regular_user"] += 1
|
|
736
|
+
elif role == "assistant":
|
|
737
|
+
if self.tool_content_detector.is_tool_call_content(content):
|
|
738
|
+
message_types["tool_call"] += 1
|
|
739
|
+
else:
|
|
740
|
+
message_types["regular_assistant"] += 1
|
|
741
|
+
|
|
742
|
+
return {
|
|
743
|
+
"total_messages": len(conversations),
|
|
744
|
+
"role_distribution": role_counts,
|
|
745
|
+
"message_type_distribution": message_types
|
|
746
|
+
}
|