auto-coder 0.1.400__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of auto-coder might be problematic. Click here for more details.
- auto_coder-2.0.0.dist-info/LICENSE +158 -0
- auto_coder-2.0.0.dist-info/METADATA +558 -0
- auto_coder-2.0.0.dist-info/RECORD +795 -0
- {auto_coder-0.1.400.dist-info → auto_coder-2.0.0.dist-info}/WHEEL +1 -1
- {auto_coder-0.1.400.dist-info → auto_coder-2.0.0.dist-info}/entry_points.txt +3 -3
- autocoder/__init__.py +31 -0
- autocoder/agent/auto_filegroup.py +32 -13
- autocoder/agent/auto_learn_from_commit.py +9 -1
- autocoder/agent/base_agentic/__init__.py +3 -0
- autocoder/agent/base_agentic/agent_hub.py +1 -1
- autocoder/agent/base_agentic/base_agent.py +235 -136
- autocoder/agent/base_agentic/default_tools.py +119 -118
- autocoder/agent/base_agentic/test_base_agent.py +1 -1
- autocoder/agent/base_agentic/tool_registry.py +32 -20
- autocoder/agent/base_agentic/tools/read_file_tool_resolver.py +25 -4
- autocoder/agent/base_agentic/tools/write_to_file_tool_resolver.py +24 -11
- autocoder/agent/base_agentic/types.py +42 -0
- autocoder/agent/entry_command_agent/chat.py +73 -59
- autocoder/auto_coder.py +31 -40
- autocoder/auto_coder_rag.py +11 -1084
- autocoder/auto_coder_runner.py +1029 -2310
- autocoder/auto_coder_terminal.py +26 -0
- autocoder/auto_coder_terminal_v3.py +190 -0
- autocoder/chat/conf_command.py +224 -124
- autocoder/chat/models_command.py +361 -299
- autocoder/chat/rules_command.py +79 -31
- autocoder/chat_auto_coder.py +1021 -372
- autocoder/chat_auto_coder_lang.py +23 -732
- autocoder/commands/auto_command.py +26 -9
- autocoder/commands/auto_web.py +1 -1
- autocoder/commands/tools.py +44 -44
- autocoder/common/__init__.py +150 -128
- autocoder/common/ac_style_command_parser/__init__.py +39 -2
- autocoder/common/ac_style_command_parser/config.py +422 -0
- autocoder/common/ac_style_command_parser/parser.py +292 -78
- autocoder/common/ac_style_command_parser/test_parser.py +241 -16
- autocoder/common/ac_style_command_parser/test_typed_parser.py +342 -0
- autocoder/common/ac_style_command_parser/typed_parser.py +653 -0
- autocoder/common/action_yml_file_manager.py +25 -13
- autocoder/common/agent_events/__init__.py +52 -0
- autocoder/common/agent_events/agent_event_emitter.py +193 -0
- autocoder/common/agent_events/event_factory.py +177 -0
- autocoder/common/agent_events/examples.py +307 -0
- autocoder/common/agent_events/types.py +113 -0
- autocoder/common/agent_events/utils.py +68 -0
- autocoder/common/agent_hooks/__init__.py +44 -0
- autocoder/common/agent_hooks/examples.py +582 -0
- autocoder/common/agent_hooks/hook_executor.py +217 -0
- autocoder/common/agent_hooks/hook_manager.py +288 -0
- autocoder/common/agent_hooks/types.py +133 -0
- autocoder/common/agent_hooks/utils.py +99 -0
- autocoder/common/agent_query_queue/queue_executor.py +324 -0
- autocoder/common/agent_query_queue/queue_manager.py +325 -0
- autocoder/common/agents/__init__.py +11 -0
- autocoder/common/agents/agent_manager.py +323 -0
- autocoder/common/agents/agent_parser.py +189 -0
- autocoder/common/agents/example_usage.py +344 -0
- autocoder/common/agents/integration_example.py +330 -0
- autocoder/common/agents/test_agent_parser.py +545 -0
- autocoder/common/async_utils.py +101 -0
- autocoder/common/auto_coder_lang.py +23 -972
- autocoder/common/autocoderargs_parser/__init__.py +14 -0
- autocoder/common/autocoderargs_parser/parser.py +184 -0
- autocoder/common/autocoderargs_parser/tests/__init__.py +1 -0
- autocoder/common/autocoderargs_parser/tests/test_args_parser.py +235 -0
- autocoder/common/autocoderargs_parser/tests/test_token_parser.py +195 -0
- autocoder/common/autocoderargs_parser/token_parser.py +290 -0
- autocoder/common/buildin_tokenizer.py +2 -4
- autocoder/common/code_auto_generate.py +149 -74
- autocoder/common/code_auto_generate_diff.py +163 -70
- autocoder/common/code_auto_generate_editblock.py +179 -89
- autocoder/common/code_auto_generate_strict_diff.py +167 -72
- autocoder/common/code_auto_merge_editblock.py +13 -6
- autocoder/common/code_modification_ranker.py +1 -1
- autocoder/common/command_completer.py +3 -3
- autocoder/common/command_file_manager/manager.py +183 -47
- autocoder/common/command_file_manager/test_command_file_manager.py +507 -0
- autocoder/common/command_templates.py +1 -1
- autocoder/common/conf_utils.py +2 -4
- autocoder/common/conversations/config.py +11 -3
- autocoder/common/conversations/get_conversation_manager.py +100 -2
- autocoder/common/conversations/llm_stats_models.py +264 -0
- autocoder/common/conversations/manager.py +112 -28
- autocoder/common/conversations/models.py +16 -2
- autocoder/common/conversations/storage/index_manager.py +134 -10
- autocoder/common/core_config/__init__.py +63 -0
- autocoder/common/core_config/agentic_mode_manager.py +109 -0
- autocoder/common/core_config/base_manager.py +123 -0
- autocoder/common/core_config/compatibility.py +151 -0
- autocoder/common/core_config/config_manager.py +156 -0
- autocoder/common/core_config/conversation_manager.py +31 -0
- autocoder/common/core_config/exclude_manager.py +72 -0
- autocoder/common/core_config/file_manager.py +177 -0
- autocoder/common/core_config/human_as_model_manager.py +129 -0
- autocoder/common/core_config/lib_manager.py +54 -0
- autocoder/common/core_config/main_manager.py +81 -0
- autocoder/common/core_config/mode_manager.py +126 -0
- autocoder/common/core_config/models.py +70 -0
- autocoder/common/core_config/test_memory_manager.py +1056 -0
- autocoder/common/env_manager.py +282 -0
- autocoder/common/env_manager_usage_example.py +211 -0
- autocoder/common/file_checkpoint/conversation_checkpoint.py +19 -19
- autocoder/common/file_checkpoint/manager.py +264 -48
- autocoder/common/file_checkpoint/test_backup.py +1 -18
- autocoder/common/file_checkpoint/test_manager.py +270 -1
- autocoder/common/file_checkpoint/test_store.py +1 -17
- autocoder/common/file_handler/__init__.py +23 -0
- autocoder/common/file_handler/active_context_handler.py +159 -0
- autocoder/common/file_handler/add_files_handler.py +409 -0
- autocoder/common/file_handler/chat_handler.py +180 -0
- autocoder/common/file_handler/coding_handler.py +401 -0
- autocoder/common/file_handler/commit_handler.py +200 -0
- autocoder/common/file_handler/lib_handler.py +156 -0
- autocoder/common/file_handler/list_files_handler.py +111 -0
- autocoder/common/file_handler/mcp_handler.py +268 -0
- autocoder/common/file_handler/models_handler.py +493 -0
- autocoder/common/file_handler/remove_files_handler.py +172 -0
- autocoder/common/file_monitor/test_file_monitor.py +307 -0
- autocoder/common/git_utils.py +51 -10
- autocoder/common/global_cancel.py +15 -6
- autocoder/common/ignorefiles/test_ignore_file_utils.py +1 -1
- autocoder/common/international/__init__.py +31 -0
- autocoder/common/international/demo_international.py +92 -0
- autocoder/common/international/message_manager.py +157 -0
- autocoder/common/international/messages/__init__.py +56 -0
- autocoder/common/international/messages/async_command_messages.py +507 -0
- autocoder/common/international/messages/auto_coder_messages.py +2208 -0
- autocoder/common/international/messages/chat_auto_coder_messages.py +1547 -0
- autocoder/common/international/messages/command_help_messages.py +986 -0
- autocoder/common/international/messages/conversation_command_messages.py +191 -0
- autocoder/common/international/messages/git_helper_plugin_messages.py +159 -0
- autocoder/common/international/messages/queue_command_messages.py +751 -0
- autocoder/common/international/messages/rules_command_messages.py +77 -0
- autocoder/common/international/messages/sdk_messages.py +1707 -0
- autocoder/common/international/messages/token_helper_plugin_messages.py +361 -0
- autocoder/common/international/messages/tool_display_messages.py +1212 -0
- autocoder/common/international/messages/workflow_exception_messages.py +473 -0
- autocoder/common/international/test_international.py +612 -0
- autocoder/common/linter_core/__init__.py +28 -0
- autocoder/common/linter_core/base_linter.py +61 -0
- autocoder/common/linter_core/config_loader.py +271 -0
- autocoder/common/linter_core/formatters/__init__.py +0 -0
- autocoder/common/linter_core/formatters/base_formatter.py +38 -0
- autocoder/common/linter_core/formatters/raw_formatter.py +17 -0
- autocoder/common/linter_core/linter.py +166 -0
- autocoder/common/linter_core/linter_factory.py +216 -0
- autocoder/common/linter_core/linter_manager.py +333 -0
- autocoder/common/linter_core/linters/__init__.py +9 -0
- autocoder/common/linter_core/linters/java_linter.py +342 -0
- autocoder/common/linter_core/linters/python_linter.py +115 -0
- autocoder/common/linter_core/linters/typescript_linter.py +119 -0
- autocoder/common/linter_core/models/__init__.py +7 -0
- autocoder/common/linter_core/models/lint_result.py +91 -0
- autocoder/common/linter_core/models.py +33 -0
- autocoder/common/linter_core/tests/__init__.py +3 -0
- autocoder/common/linter_core/tests/test_config_loader.py +323 -0
- autocoder/common/linter_core/tests/test_config_loading.py +308 -0
- autocoder/common/linter_core/tests/test_factory_manager.py +234 -0
- autocoder/common/linter_core/tests/test_formatters.py +147 -0
- autocoder/common/linter_core/tests/test_integration.py +317 -0
- autocoder/common/linter_core/tests/test_java_linter.py +496 -0
- autocoder/common/linter_core/tests/test_linters.py +265 -0
- autocoder/common/linter_core/tests/test_models.py +81 -0
- autocoder/common/linter_core/tests/verify_config_loading.py +296 -0
- autocoder/common/linter_core/tests/verify_fixes.py +183 -0
- autocoder/common/llm_friendly_package/__init__.py +31 -0
- autocoder/common/llm_friendly_package/base_manager.py +102 -0
- autocoder/common/llm_friendly_package/docs_manager.py +121 -0
- autocoder/common/llm_friendly_package/library_manager.py +171 -0
- autocoder/common/{llm_friendly_package.py → llm_friendly_package/main_manager.py} +204 -231
- autocoder/common/llm_friendly_package/models.py +40 -0
- autocoder/common/llm_friendly_package/test_llm_friendly_package.py +536 -0
- autocoder/common/llms/__init__.py +15 -0
- autocoder/common/llms/demo_error_handling.py +85 -0
- autocoder/common/llms/factory.py +142 -0
- autocoder/common/llms/manager.py +264 -0
- autocoder/common/llms/pricing.py +121 -0
- autocoder/common/llms/registry.py +288 -0
- autocoder/common/llms/schema.py +77 -0
- autocoder/common/llms/simple_demo.py +45 -0
- autocoder/common/llms/test_quick_model.py +116 -0
- autocoder/common/llms/test_remove_functionality.py +182 -0
- autocoder/common/llms/tests/__init__.py +1 -0
- autocoder/common/llms/tests/test_manager.py +330 -0
- autocoder/common/llms/tests/test_registry.py +364 -0
- autocoder/common/mcp_tools/__init__.py +62 -0
- autocoder/common/{mcp_tools.py → mcp_tools/executor.py} +49 -40
- autocoder/common/{mcp_hub.py → mcp_tools/hub.py} +42 -68
- autocoder/common/{mcp_server_install.py → mcp_tools/installer.py} +16 -28
- autocoder/common/{mcp_server.py → mcp_tools/server.py} +176 -48
- autocoder/common/mcp_tools/test_keyboard_interrupt.py +93 -0
- autocoder/common/mcp_tools/test_mcp_tools.py +391 -0
- autocoder/common/{mcp_server_types.py → mcp_tools/types.py} +121 -48
- autocoder/common/mcp_tools/verify_functionality.py +202 -0
- autocoder/common/model_speed_tester.py +32 -26
- autocoder/common/priority_directory_finder/__init__.py +142 -0
- autocoder/common/priority_directory_finder/examples.py +230 -0
- autocoder/common/priority_directory_finder/finder.py +283 -0
- autocoder/common/priority_directory_finder/models.py +236 -0
- autocoder/common/priority_directory_finder/test_priority_directory_finder.py +431 -0
- autocoder/common/project_scanner/__init__.py +18 -0
- autocoder/common/project_scanner/compat.py +77 -0
- autocoder/common/project_scanner/scanner.py +436 -0
- autocoder/common/project_tracker/__init__.py +27 -0
- autocoder/common/project_tracker/api.py +228 -0
- autocoder/common/project_tracker/demo.py +272 -0
- autocoder/common/project_tracker/tracker.py +487 -0
- autocoder/common/project_tracker/types.py +53 -0
- autocoder/common/pruner/__init__.py +67 -0
- autocoder/common/pruner/agentic_conversation_pruner.py +746 -0
- autocoder/common/{context_pruner.py → pruner/context_pruner.py} +137 -40
- autocoder/common/pruner/conversation_message_ids_api.py +386 -0
- autocoder/common/pruner/conversation_message_ids_manager.py +347 -0
- autocoder/common/pruner/conversation_message_ids_pruner.py +473 -0
- autocoder/common/pruner/conversation_normalizer.py +347 -0
- autocoder/common/{conversation_pruner.py → pruner/conversation_pruner.py} +26 -6
- autocoder/common/pruner/test_agentic_conversation_pruner.py +784 -0
- autocoder/common/pruner/test_context_pruner.py +546 -0
- autocoder/common/pruner/test_conversation_normalizer.py +502 -0
- autocoder/common/pruner/test_tool_content_detector.py +324 -0
- autocoder/common/pruner/tool_content_detector.py +227 -0
- autocoder/common/pruner/tools/__init__.py +18 -0
- autocoder/common/pruner/tools/query_message_ids.py +264 -0
- autocoder/common/pruner/tools/test_agentic_pruning_logic.py +432 -0
- autocoder/common/pruner/tools/test_message_ids_pruning_only.py +192 -0
- autocoder/common/pull_requests/__init__.py +9 -1
- autocoder/common/pull_requests/utils.py +122 -1
- autocoder/common/rag_manager/rag_manager.py +36 -40
- autocoder/common/rulefiles/__init__.py +53 -1
- autocoder/common/rulefiles/api.py +250 -0
- autocoder/common/rulefiles/core/__init__.py +14 -0
- autocoder/common/rulefiles/core/manager.py +241 -0
- autocoder/common/rulefiles/core/selector.py +805 -0
- autocoder/common/rulefiles/models/__init__.py +20 -0
- autocoder/common/rulefiles/models/index.py +16 -0
- autocoder/common/rulefiles/models/init_rule.py +18 -0
- autocoder/common/rulefiles/models/rule_file.py +18 -0
- autocoder/common/rulefiles/models/rule_relevance.py +14 -0
- autocoder/common/rulefiles/models/summary.py +16 -0
- autocoder/common/rulefiles/test_rulefiles.py +776 -0
- autocoder/common/rulefiles/utils/__init__.py +34 -0
- autocoder/common/rulefiles/utils/monitor.py +86 -0
- autocoder/common/rulefiles/utils/parser.py +230 -0
- autocoder/common/save_formatted_log.py +67 -10
- autocoder/common/search_replace.py +8 -1
- autocoder/common/search_replace_patch/__init__.py +24 -0
- autocoder/common/search_replace_patch/base.py +115 -0
- autocoder/common/search_replace_patch/manager.py +248 -0
- autocoder/common/search_replace_patch/patch_replacer.py +304 -0
- autocoder/common/search_replace_patch/similarity_replacer.py +306 -0
- autocoder/common/search_replace_patch/string_replacer.py +181 -0
- autocoder/common/search_replace_patch/tests/__init__.py +3 -0
- autocoder/common/search_replace_patch/tests/run_tests.py +126 -0
- autocoder/common/search_replace_patch/tests/test_base.py +188 -0
- autocoder/common/search_replace_patch/tests/test_empty_line_insert.py +233 -0
- autocoder/common/search_replace_patch/tests/test_integration.py +389 -0
- autocoder/common/search_replace_patch/tests/test_manager.py +351 -0
- autocoder/common/search_replace_patch/tests/test_patch_replacer.py +316 -0
- autocoder/common/search_replace_patch/tests/test_regex_replacer.py +306 -0
- autocoder/common/search_replace_patch/tests/test_similarity_replacer.py +384 -0
- autocoder/common/shell_commands/__init__.py +197 -0
- autocoder/common/shell_commands/background_process_notifier.py +346 -0
- autocoder/common/shell_commands/command_executor.py +1127 -0
- autocoder/common/shell_commands/error_recovery.py +541 -0
- autocoder/common/shell_commands/exceptions.py +120 -0
- autocoder/common/shell_commands/interactive_executor.py +476 -0
- autocoder/common/shell_commands/interactive_pexpect_process.py +623 -0
- autocoder/common/shell_commands/interactive_process.py +744 -0
- autocoder/common/shell_commands/interactive_session_manager.py +1014 -0
- autocoder/common/shell_commands/monitoring.py +529 -0
- autocoder/common/shell_commands/process_cleanup.py +386 -0
- autocoder/common/shell_commands/process_manager.py +606 -0
- autocoder/common/shell_commands/test_interactive_pexpect_process.py +281 -0
- autocoder/common/shell_commands/tests/__init__.py +6 -0
- autocoder/common/shell_commands/tests/conftest.py +118 -0
- autocoder/common/shell_commands/tests/test_background_process_notifier.py +703 -0
- autocoder/common/shell_commands/tests/test_command_executor.py +448 -0
- autocoder/common/shell_commands/tests/test_error_recovery.py +305 -0
- autocoder/common/shell_commands/tests/test_exceptions.py +299 -0
- autocoder/common/shell_commands/tests/test_execute_batch.py +588 -0
- autocoder/common/shell_commands/tests/test_indented_batch_commands.py +244 -0
- autocoder/common/shell_commands/tests/test_integration.py +664 -0
- autocoder/common/shell_commands/tests/test_monitoring.py +546 -0
- autocoder/common/shell_commands/tests/test_performance.py +632 -0
- autocoder/common/shell_commands/tests/test_process_cleanup.py +397 -0
- autocoder/common/shell_commands/tests/test_process_manager.py +606 -0
- autocoder/common/shell_commands/tests/test_timeout_config.py +343 -0
- autocoder/common/shell_commands/tests/test_timeout_manager.py +520 -0
- autocoder/common/shell_commands/timeout_config.py +315 -0
- autocoder/common/shell_commands/timeout_manager.py +352 -0
- autocoder/common/terminal_paste/__init__.py +14 -0
- autocoder/common/terminal_paste/demo.py +145 -0
- autocoder/common/terminal_paste/demo_paste_functionality.py +95 -0
- autocoder/common/terminal_paste/paste_handler.py +200 -0
- autocoder/common/terminal_paste/paste_manager.py +118 -0
- autocoder/common/terminal_paste/tests/__init__.py +1 -0
- autocoder/common/terminal_paste/tests/test_paste_handler.py +182 -0
- autocoder/common/terminal_paste/tests/test_paste_manager.py +126 -0
- autocoder/common/terminal_paste/utils.py +163 -0
- autocoder/common/test_autocoder_args.py +232 -0
- autocoder/common/test_env_manager.py +173 -0
- autocoder/common/test_env_manager_integration.py +159 -0
- autocoder/common/text_similarity/__init__.py +9 -0
- autocoder/common/text_similarity/demo.py +216 -0
- autocoder/common/text_similarity/examples.py +266 -0
- autocoder/common/text_similarity/test_text_similarity.py +306 -0
- autocoder/common/text_similarity/text_similarity.py +194 -0
- autocoder/common/text_similarity/utils.py +125 -0
- autocoder/common/todos/__init__.py +61 -0
- autocoder/common/todos/cache/__init__.py +16 -0
- autocoder/common/todos/cache/base_cache.py +89 -0
- autocoder/common/todos/cache/cache_manager.py +228 -0
- autocoder/common/todos/cache/memory_cache.py +225 -0
- autocoder/common/todos/config.py +155 -0
- autocoder/common/todos/exceptions.py +35 -0
- autocoder/common/todos/get_todo_manager.py +161 -0
- autocoder/common/todos/manager.py +537 -0
- autocoder/common/todos/models.py +239 -0
- autocoder/common/todos/storage/__init__.py +14 -0
- autocoder/common/todos/storage/base_storage.py +76 -0
- autocoder/common/todos/storage/file_storage.py +278 -0
- autocoder/common/tokens/__init__.py +15 -0
- autocoder/common/tokens/counter.py +44 -2
- autocoder/common/tools_manager/__init__.py +17 -0
- autocoder/common/tools_manager/examples.py +162 -0
- autocoder/common/tools_manager/manager.py +385 -0
- autocoder/common/tools_manager/models.py +39 -0
- autocoder/common/tools_manager/test_tools_manager.py +303 -0
- autocoder/common/tools_manager/utils.py +191 -0
- autocoder/common/v2/agent/agentic_callbacks.py +270 -0
- autocoder/common/v2/agent/agentic_edit.py +2729 -2052
- autocoder/common/v2/agent/agentic_edit_change_manager.py +474 -0
- autocoder/common/v2/agent/agentic_edit_tools/__init__.py +43 -2
- autocoder/common/v2/agent/agentic_edit_tools/ac_mod_list_tool_resolver.py +279 -0
- autocoder/common/v2/agent/agentic_edit_tools/ac_mod_read_tool_resolver.py +40 -0
- autocoder/common/v2/agent/agentic_edit_tools/ac_mod_write_tool_resolver.py +52 -0
- autocoder/common/v2/agent/agentic_edit_tools/ask_followup_question_tool_resolver.py +8 -0
- autocoder/common/v2/agent/agentic_edit_tools/background_task_tool_resolver.py +1167 -0
- autocoder/common/v2/agent/agentic_edit_tools/base_tool_resolver.py +2 -2
- autocoder/common/v2/agent/agentic_edit_tools/conversation_message_ids_read_tool_resolver.py +214 -0
- autocoder/common/v2/agent/agentic_edit_tools/conversation_message_ids_write_tool_resolver.py +299 -0
- autocoder/common/v2/agent/agentic_edit_tools/count_tokens_tool_resolver.py +290 -0
- autocoder/common/v2/agent/agentic_edit_tools/execute_command_tool_resolver.py +565 -30
- autocoder/common/v2/agent/agentic_edit_tools/execute_workflow_tool_resolver.py +485 -0
- autocoder/common/v2/agent/agentic_edit_tools/extract_to_text_tool_resolver.py +225 -0
- autocoder/common/v2/agent/agentic_edit_tools/lint_report.py +79 -0
- autocoder/common/v2/agent/agentic_edit_tools/linter_config_models.py +343 -0
- autocoder/common/v2/agent/agentic_edit_tools/linter_enabled_tool_resolver.py +189 -0
- autocoder/common/v2/agent/agentic_edit_tools/list_files_tool_resolver.py +169 -101
- autocoder/common/v2/agent/agentic_edit_tools/load_extra_document_tool_resolver.py +349 -0
- autocoder/common/v2/agent/agentic_edit_tools/read_file_tool_resolver.py +244 -51
- autocoder/common/v2/agent/agentic_edit_tools/replace_in_file_tool_resolver.py +667 -147
- autocoder/common/v2/agent/agentic_edit_tools/run_named_subagents_tool_resolver.py +691 -0
- autocoder/common/v2/agent/agentic_edit_tools/search_files_tool_resolver.py +409 -140
- autocoder/common/v2/agent/agentic_edit_tools/session_interactive_tool_resolver.py +115 -0
- autocoder/common/v2/agent/agentic_edit_tools/session_start_tool_resolver.py +190 -0
- autocoder/common/v2/agent/agentic_edit_tools/session_stop_tool_resolver.py +76 -0
- autocoder/common/v2/agent/agentic_edit_tools/test_write_to_file_tool_resolver.py +209 -194
- autocoder/common/v2/agent/agentic_edit_tools/todo_read_tool_resolver.py +135 -0
- autocoder/common/v2/agent/agentic_edit_tools/todo_write_tool_resolver.py +328 -0
- autocoder/common/v2/agent/agentic_edit_tools/use_mcp_tool_resolver.py +2 -2
- autocoder/common/v2/agent/agentic_edit_tools/web_crawl_tool_resolver.py +557 -0
- autocoder/common/v2/agent/agentic_edit_tools/web_search_tool_resolver.py +600 -0
- autocoder/common/v2/agent/agentic_edit_tools/write_to_file_tool_resolver.py +56 -121
- autocoder/common/v2/agent/agentic_edit_types.py +386 -10
- autocoder/common/v2/agent/runner/__init__.py +31 -0
- autocoder/common/v2/agent/runner/base_runner.py +92 -0
- autocoder/common/v2/agent/runner/file_based_event_runner.py +217 -0
- autocoder/common/v2/agent/runner/sdk_runner.py +182 -0
- autocoder/common/v2/agent/runner/terminal_runner.py +396 -0
- autocoder/common/v2/agent/runner/tool_display.py +589 -0
- autocoder/common/v2/agent/test_agentic_callbacks.py +265 -0
- autocoder/common/v2/agent/test_agentic_edit.py +194 -0
- autocoder/common/v2/agent/tool_caller/__init__.py +24 -0
- autocoder/common/v2/agent/tool_caller/default_tool_resolver_map.py +135 -0
- autocoder/common/v2/agent/tool_caller/integration_test.py +172 -0
- autocoder/common/v2/agent/tool_caller/plugins/__init__.py +14 -0
- autocoder/common/v2/agent/tool_caller/plugins/base_plugin.py +126 -0
- autocoder/common/v2/agent/tool_caller/plugins/examples/__init__.py +13 -0
- autocoder/common/v2/agent/tool_caller/plugins/examples/logging_plugin.py +164 -0
- autocoder/common/v2/agent/tool_caller/plugins/examples/security_filter_plugin.py +198 -0
- autocoder/common/v2/agent/tool_caller/plugins/plugin_interface.py +141 -0
- autocoder/common/v2/agent/tool_caller/test_tool_caller.py +278 -0
- autocoder/common/v2/agent/tool_caller/tool_call_plugin_manager.py +331 -0
- autocoder/common/v2/agent/tool_caller/tool_caller.py +337 -0
- autocoder/common/v2/agent/tool_caller/usage_example.py +193 -0
- autocoder/common/v2/code_agentic_editblock_manager.py +4 -4
- autocoder/common/v2/code_auto_generate.py +136 -78
- autocoder/common/v2/code_auto_generate_diff.py +135 -79
- autocoder/common/v2/code_auto_generate_editblock.py +174 -99
- autocoder/common/v2/code_auto_generate_strict_diff.py +151 -71
- autocoder/common/v2/code_auto_merge.py +1 -1
- autocoder/common/v2/code_auto_merge_editblock.py +13 -1
- autocoder/common/v2/code_diff_manager.py +3 -3
- autocoder/common/v2/code_editblock_manager.py +4 -14
- autocoder/common/v2/code_manager.py +1 -1
- autocoder/common/v2/code_strict_diff_manager.py +2 -2
- autocoder/common/wrap_llm_hint/__init__.py +10 -0
- autocoder/common/wrap_llm_hint/test_wrap_llm_hint.py +1067 -0
- autocoder/common/wrap_llm_hint/utils.py +432 -0
- autocoder/common/wrap_llm_hint/wrap_llm_hint.py +323 -0
- autocoder/completer/__init__.py +8 -0
- autocoder/completer/command_completer_v2.py +1051 -0
- autocoder/default_project/__init__.py +501 -0
- autocoder/dispacher/__init__.py +4 -12
- autocoder/dispacher/actions/action.py +165 -7
- autocoder/dispacher/actions/plugins/action_regex_project.py +2 -2
- autocoder/index/entry.py +117 -125
- autocoder/{agent → index/filter}/agentic_filter.py +323 -334
- autocoder/index/filter/normal_filter.py +5 -11
- autocoder/index/filter/quick_filter.py +1 -1
- autocoder/index/index.py +36 -9
- autocoder/index/tests/__init__.py +1 -0
- autocoder/index/tests/run_tests.py +195 -0
- autocoder/index/tests/test_entry.py +303 -0
- autocoder/index/tests/test_index_manager.py +314 -0
- autocoder/index/tests/test_module_integration.py +300 -0
- autocoder/index/tests/test_symbols_utils.py +183 -0
- autocoder/inner/__init__.py +4 -0
- autocoder/inner/agentic.py +932 -0
- autocoder/inner/async_command_handler.py +992 -0
- autocoder/inner/conversation_command_handlers.py +623 -0
- autocoder/inner/merge_command_handler.py +213 -0
- autocoder/inner/queue_command_handler.py +684 -0
- autocoder/models.py +95 -266
- autocoder/plugins/git_helper_plugin.py +31 -29
- autocoder/plugins/token_helper_plugin.py +156 -37
- autocoder/pyproject/__init__.py +32 -29
- autocoder/rag/agentic_rag.py +215 -75
- autocoder/rag/cache/simple_cache.py +1 -2
- autocoder/rag/loaders/image_loader.py +1 -1
- autocoder/rag/long_context_rag.py +42 -26
- autocoder/rag/qa_conversation_strategy.py +1 -1
- autocoder/rag/terminal/__init__.py +17 -0
- autocoder/rag/terminal/args.py +581 -0
- autocoder/rag/terminal/bootstrap.py +61 -0
- autocoder/rag/terminal/command_handlers.py +653 -0
- autocoder/rag/terminal/formatters/__init__.py +20 -0
- autocoder/rag/terminal/formatters/base.py +70 -0
- autocoder/rag/terminal/formatters/json_format.py +66 -0
- autocoder/rag/terminal/formatters/stream_json.py +95 -0
- autocoder/rag/terminal/formatters/text.py +28 -0
- autocoder/rag/terminal/init.py +120 -0
- autocoder/rag/terminal/utils.py +106 -0
- autocoder/rag/test_agentic_rag.py +389 -0
- autocoder/rag/test_doc_filter.py +3 -3
- autocoder/rag/test_long_context_rag.py +1 -1
- autocoder/rag/test_token_limiter.py +517 -10
- autocoder/rag/token_counter.py +3 -0
- autocoder/rag/token_limiter.py +19 -15
- autocoder/rag/tools/__init__.py +26 -2
- autocoder/rag/tools/bochaai_example.py +343 -0
- autocoder/rag/tools/bochaai_sdk.py +541 -0
- autocoder/rag/tools/metaso_example.py +268 -0
- autocoder/rag/tools/metaso_sdk.py +417 -0
- autocoder/rag/tools/recall_tool.py +28 -7
- autocoder/rag/tools/run_integration_tests.py +204 -0
- autocoder/rag/tools/test_all_providers.py +318 -0
- autocoder/rag/tools/test_bochaai_integration.py +482 -0
- autocoder/rag/tools/test_final_integration.py +215 -0
- autocoder/rag/tools/test_metaso_integration.py +424 -0
- autocoder/rag/tools/test_metaso_real.py +171 -0
- autocoder/rag/tools/test_web_crawl_tool.py +639 -0
- autocoder/rag/tools/test_web_search_tool.py +509 -0
- autocoder/rag/tools/todo_read_tool.py +202 -0
- autocoder/rag/tools/todo_write_tool.py +412 -0
- autocoder/rag/tools/web_crawl_tool.py +634 -0
- autocoder/rag/tools/web_search_tool.py +558 -0
- autocoder/rag/tools/web_tools_example.py +119 -0
- autocoder/rag/types.py +16 -0
- autocoder/rag/variable_holder.py +4 -2
- autocoder/rags.py +86 -79
- autocoder/regexproject/__init__.py +23 -21
- autocoder/run_context.py +9 -0
- autocoder/sdk/__init__.py +50 -161
- autocoder/sdk/api.py +370 -0
- autocoder/sdk/async_runner/__init__.py +26 -0
- autocoder/sdk/async_runner/async_executor.py +650 -0
- autocoder/sdk/async_runner/async_handler.py +356 -0
- autocoder/sdk/async_runner/markdown_processor.py +595 -0
- autocoder/sdk/async_runner/task_metadata.py +284 -0
- autocoder/sdk/async_runner/worktree_manager.py +438 -0
- autocoder/sdk/cli/__init__.py +2 -5
- autocoder/sdk/cli/formatters.py +28 -204
- autocoder/sdk/cli/handlers.py +77 -44
- autocoder/sdk/cli/main.py +158 -170
- autocoder/sdk/cli/options.py +95 -22
- autocoder/sdk/constants.py +139 -51
- autocoder/sdk/core/auto_coder_core.py +484 -267
- autocoder/sdk/core/bridge.py +298 -118
- autocoder/sdk/exceptions.py +18 -12
- autocoder/sdk/formatters/__init__.py +19 -0
- autocoder/sdk/formatters/input.py +64 -0
- autocoder/sdk/formatters/output.py +247 -0
- autocoder/sdk/formatters/stream.py +54 -0
- autocoder/sdk/models/__init__.py +6 -5
- autocoder/sdk/models/options.py +55 -18
- autocoder/sdk/utils/formatters.py +27 -195
- autocoder/suffixproject/__init__.py +28 -25
- autocoder/terminal/__init__.py +14 -0
- autocoder/terminal/app.py +454 -0
- autocoder/terminal/args.py +32 -0
- autocoder/terminal/bootstrap.py +178 -0
- autocoder/terminal/command_processor.py +521 -0
- autocoder/terminal/command_registry.py +57 -0
- autocoder/terminal/help.py +97 -0
- autocoder/terminal/tasks/__init__.py +5 -0
- autocoder/terminal/tasks/background.py +77 -0
- autocoder/terminal/tasks/task_event.py +70 -0
- autocoder/terminal/ui/__init__.py +13 -0
- autocoder/terminal/ui/completer.py +268 -0
- autocoder/terminal/ui/keybindings.py +75 -0
- autocoder/terminal/ui/session.py +41 -0
- autocoder/terminal/ui/toolbar.py +64 -0
- autocoder/terminal/utils/__init__.py +13 -0
- autocoder/terminal/utils/errors.py +18 -0
- autocoder/terminal/utils/paths.py +19 -0
- autocoder/terminal/utils/shell.py +43 -0
- autocoder/terminal_v3/__init__.py +10 -0
- autocoder/terminal_v3/app.py +201 -0
- autocoder/terminal_v3/handlers/__init__.py +5 -0
- autocoder/terminal_v3/handlers/command_handler.py +131 -0
- autocoder/terminal_v3/models/__init__.py +6 -0
- autocoder/terminal_v3/models/conversation_buffer.py +214 -0
- autocoder/terminal_v3/models/message.py +50 -0
- autocoder/terminal_v3/models/tool_display.py +247 -0
- autocoder/terminal_v3/ui/__init__.py +7 -0
- autocoder/terminal_v3/ui/keybindings.py +56 -0
- autocoder/terminal_v3/ui/layout.py +141 -0
- autocoder/terminal_v3/ui/styles.py +43 -0
- autocoder/tsproject/__init__.py +23 -23
- autocoder/utils/auto_coder_utils/chat_stream_out.py +1 -1
- autocoder/utils/llms.py +88 -80
- autocoder/utils/math_utils.py +101 -0
- autocoder/utils/model_provider_selector.py +16 -4
- autocoder/utils/operate_config_api.py +33 -5
- autocoder/utils/thread_utils.py +2 -2
- autocoder/version.py +4 -2
- autocoder/workflow_agents/__init__.py +84 -0
- autocoder/workflow_agents/agent.py +143 -0
- autocoder/workflow_agents/exceptions.py +573 -0
- autocoder/workflow_agents/executor.py +489 -0
- autocoder/workflow_agents/loader.py +737 -0
- autocoder/workflow_agents/runner.py +267 -0
- autocoder/workflow_agents/types.py +172 -0
- autocoder/workflow_agents/utils.py +434 -0
- autocoder/workflow_agents/workflow_manager.py +211 -0
- auto_coder-0.1.400.dist-info/METADATA +0 -396
- auto_coder-0.1.400.dist-info/RECORD +0 -425
- auto_coder-0.1.400.dist-info/licenses/LICENSE +0 -201
- autocoder/auto_coder_server.py +0 -672
- autocoder/benchmark.py +0 -138
- autocoder/common/ac_style_command_parser/example.py +0 -7
- autocoder/common/cleaner.py +0 -31
- autocoder/common/command_completer_v2.py +0 -615
- autocoder/common/directory_cache/__init__.py +0 -1
- autocoder/common/directory_cache/cache.py +0 -192
- autocoder/common/directory_cache/test_cache.py +0 -190
- autocoder/common/file_checkpoint/examples.py +0 -217
- autocoder/common/llm_friendly_package_example.py +0 -138
- autocoder/common/llm_friendly_package_test.py +0 -63
- autocoder/common/pull_requests/test_module.py +0 -1
- autocoder/common/rulefiles/autocoderrules_utils.py +0 -484
- autocoder/common/text.py +0 -30
- autocoder/common/v2/agent/agentic_edit_tools/list_package_info_tool_resolver.py +0 -42
- autocoder/common/v2/agent/agentic_edit_tools/test_execute_command_tool_resolver.py +0 -70
- autocoder/common/v2/agent/agentic_edit_tools/test_search_files_tool_resolver.py +0 -163
- autocoder/common/v2/agent/agentic_tool_display.py +0 -183
- autocoder/plugins/dynamic_completion_example.py +0 -148
- autocoder/plugins/sample_plugin.py +0 -160
- autocoder/sdk/cli/__main__.py +0 -26
- autocoder/sdk/cli/completion_wrapper.py +0 -38
- autocoder/sdk/cli/install_completion.py +0 -301
- autocoder/sdk/models/messages.py +0 -209
- autocoder/sdk/session/__init__.py +0 -32
- autocoder/sdk/session/session.py +0 -106
- autocoder/sdk/session/session_manager.py +0 -56
- {auto_coder-0.1.400.dist-info → auto_coder-2.0.0.dist-info}/top_level.txt +0 -0
- /autocoder/{sdk/example.py → common/agent_query_queue/__init__.py} +0 -0
|
@@ -0,0 +1,746 @@
|
|
|
1
|
+
from typing import List, Dict, Any, Union, Optional
|
|
2
|
+
import json
|
|
3
|
+
import re
|
|
4
|
+
import copy
|
|
5
|
+
import byzerllm
|
|
6
|
+
from autocoder.common.printer import Printer
|
|
7
|
+
from autocoder.common.tokens import count_string_tokens
|
|
8
|
+
from loguru import logger
|
|
9
|
+
from autocoder.common import AutoCoderArgs
|
|
10
|
+
from autocoder.common.autocoderargs_parser import AutoCoderArgsParser
|
|
11
|
+
from autocoder.common.save_formatted_log import save_formatted_log
|
|
12
|
+
from autocoder.common.wrap_llm_hint.utils import merge_with_last_user_message
|
|
13
|
+
from autocoder.common.conversations.get_conversation_manager import get_conversation_manager
|
|
14
|
+
from .tool_content_detector import ToolContentDetector
|
|
15
|
+
from .conversation_message_ids_api import get_conversation_message_ids_api
|
|
16
|
+
from .conversation_message_ids_pruner import ConversationMessageIdsPruner
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class AgenticConversationPruner:
|
|
20
|
+
"""
|
|
21
|
+
Specialized conversation pruner for agentic conversations that cleans up tool outputs.
|
|
22
|
+
|
|
23
|
+
This pruner specifically targets tool result messages (role='user', content contains '<tool_result>')
|
|
24
|
+
and replaces their content with a placeholder message to reduce token usage while maintaining
|
|
25
|
+
conversation flow.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(self, args: AutoCoderArgs, llm: Union[byzerllm.ByzerLLM, byzerllm.SimpleByzerLLM, None], conversation_id: Optional[str] = None):
|
|
29
|
+
if conversation_id is None:
|
|
30
|
+
raise ValueError("conversation_id is required in AgenticConversationPruner")
|
|
31
|
+
|
|
32
|
+
self.args = args
|
|
33
|
+
self.llm = llm
|
|
34
|
+
self.conversation_id = conversation_id
|
|
35
|
+
self.printer = Printer()
|
|
36
|
+
self.replacement_message = "This message has been cleared. If you still want to get this information, you can call the tool again to retrieve it."
|
|
37
|
+
|
|
38
|
+
# Initialize AutoCoderArgs parser for flexible parameter parsing
|
|
39
|
+
self.args_parser = AutoCoderArgsParser()
|
|
40
|
+
|
|
41
|
+
# Initialize tool content detector
|
|
42
|
+
self.tool_content_detector = ToolContentDetector(
|
|
43
|
+
replacement_message="Content cleared to save tokens"
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
# Initialize message IDs-based pruning components
|
|
47
|
+
self.message_ids_api = get_conversation_message_ids_api()
|
|
48
|
+
self.message_ids_pruner = ConversationMessageIdsPruner()
|
|
49
|
+
|
|
50
|
+
# Track pruning statistics
|
|
51
|
+
self.pruning_stats = {
|
|
52
|
+
"range_pruning_applied": False,
|
|
53
|
+
"range_pruning_success": False,
|
|
54
|
+
"original_length": 0,
|
|
55
|
+
"after_range_pruning": 0,
|
|
56
|
+
"after_tool_cleanup": 0,
|
|
57
|
+
"total_compression_ratio": 1.0
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
def _get_current_conversation_id(self) -> str:
|
|
61
|
+
"""
|
|
62
|
+
Get the current conversation ID from the constructor parameter.
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
Current conversation ID (guaranteed to be not None)
|
|
66
|
+
"""
|
|
67
|
+
return self.conversation_id
|
|
68
|
+
|
|
69
|
+
def _get_parsed_safe_zone_tokens(self) -> int:
|
|
70
|
+
"""
|
|
71
|
+
解析 conversation_prune_safe_zone_tokens 参数,支持多种格式
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
解析后的 token 数量
|
|
75
|
+
"""
|
|
76
|
+
# 添加调试信息
|
|
77
|
+
raw_value = self.args.conversation_prune_safe_zone_tokens
|
|
78
|
+
code_model = self.args.code_model or self.args.model
|
|
79
|
+
|
|
80
|
+
result = self.args_parser.parse_conversation_prune_safe_zone_tokens(
|
|
81
|
+
raw_value,
|
|
82
|
+
code_model
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
# 防护逻辑:如果结果为 0,使用默认值
|
|
86
|
+
if result == 0:
|
|
87
|
+
default_value = 50 * 1024
|
|
88
|
+
print(f"[WARNING] conversation_prune_safe_zone_tokens 为 0,使用默认值: {default_value}")
|
|
89
|
+
return default_value
|
|
90
|
+
|
|
91
|
+
return result
|
|
92
|
+
|
|
93
|
+
def prune_conversations(self, conversations: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
94
|
+
"""
|
|
95
|
+
Prune conversations by applying range-based pruning first, then cleaning up tool outputs and tool call content.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
conversations: Original conversation list
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
Pruned conversation list
|
|
102
|
+
"""
|
|
103
|
+
safe_zone_tokens = self._get_parsed_safe_zone_tokens()
|
|
104
|
+
# print(f"safe_zone_tokens: {safe_zone_tokens}")
|
|
105
|
+
|
|
106
|
+
# 保存原始conversations的深拷贝,用于最终对比分析
|
|
107
|
+
# original_conversations = copy.deepcopy(conversations)
|
|
108
|
+
original_length = len(conversations)
|
|
109
|
+
|
|
110
|
+
# Initialize pruning statistics
|
|
111
|
+
self.pruning_stats["original_length"] = original_length
|
|
112
|
+
|
|
113
|
+
current_tokens = count_string_tokens(
|
|
114
|
+
json.dumps(conversations, ensure_ascii=False))
|
|
115
|
+
|
|
116
|
+
if current_tokens <= safe_zone_tokens:
|
|
117
|
+
# Update stats for no pruning needed
|
|
118
|
+
self.pruning_stats.update({
|
|
119
|
+
"after_range_pruning": original_length,
|
|
120
|
+
"after_tool_cleanup": original_length,
|
|
121
|
+
"total_compression_ratio": 1.0
|
|
122
|
+
})
|
|
123
|
+
return conversations
|
|
124
|
+
|
|
125
|
+
# Step 1: Apply message ids pruning if conversation_id is provided
|
|
126
|
+
processed_conversations = self._apply_message_ids_pruning(
|
|
127
|
+
conversations)
|
|
128
|
+
logger.info(
|
|
129
|
+
f"After Message IDs pruning: {len(conversations)} -> {len(processed_conversations)} messages")
|
|
130
|
+
|
|
131
|
+
# Check if we're within safe zone after range pruning
|
|
132
|
+
current_tokens = count_string_tokens(json.dumps(
|
|
133
|
+
processed_conversations, ensure_ascii=False))
|
|
134
|
+
|
|
135
|
+
# Step 2: Apply tool cleanup if still needed
|
|
136
|
+
if current_tokens > safe_zone_tokens:
|
|
137
|
+
config = {"safe_zone_tokens": safe_zone_tokens}
|
|
138
|
+
processed_conversations = self._unified_tool_cleanup_prune(
|
|
139
|
+
processed_conversations, config)
|
|
140
|
+
|
|
141
|
+
# Update final statistics
|
|
142
|
+
final_length = len(processed_conversations)
|
|
143
|
+
self.pruning_stats["after_tool_cleanup"] = final_length
|
|
144
|
+
self.pruning_stats["total_compression_ratio"] = final_length / \
|
|
145
|
+
original_length if original_length > 0 else 1.0
|
|
146
|
+
|
|
147
|
+
# Log overall pruning results
|
|
148
|
+
logger.info(f"Complete pruning: {original_length} -> {final_length} messages "
|
|
149
|
+
f"(total compression: {self.pruning_stats['total_compression_ratio']:.2%})")
|
|
150
|
+
|
|
151
|
+
# if the processed_conversations is still too long, we should add a user message to ask the LLM to clean up the conversation
|
|
152
|
+
final_tokens = count_string_tokens(json.dumps(
|
|
153
|
+
processed_conversations, ensure_ascii=False))
|
|
154
|
+
if final_tokens > safe_zone_tokens:
|
|
155
|
+
cleanup_message = "The conversation is still too long, please use conversation_message_ids_write tool to save the message ids to be deleted."
|
|
156
|
+
|
|
157
|
+
# Use standardized hint merging from wrap_llm_hint module
|
|
158
|
+
processed_conversations = merge_with_last_user_message(
|
|
159
|
+
processed_conversations, cleanup_message)
|
|
160
|
+
# 执行裁剪前后对比分析并记录到日志
|
|
161
|
+
# self._compare_and_log_conversations(original_conversations, processed_conversations)
|
|
162
|
+
|
|
163
|
+
save_formatted_log(self.args.source_dir, json.dumps(processed_conversations, ensure_ascii=False),
|
|
164
|
+
"agentic_pruned_conversation", conversation_id=self._get_current_conversation_id())
|
|
165
|
+
|
|
166
|
+
return processed_conversations
|
|
167
|
+
|
|
168
|
+
def _apply_message_ids_pruning(self, conversations: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
169
|
+
"""
|
|
170
|
+
Apply message IDs-based pruning if conversation_id is provided and message IDs configuration exists.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
conversations: Original conversation list
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
Conversations after message IDs pruning (or original if no message IDs config)
|
|
177
|
+
"""
|
|
178
|
+
# Check if we have conversation_id and message IDs configuration
|
|
179
|
+
conversation_id = self._get_current_conversation_id()
|
|
180
|
+
if not conversation_id:
|
|
181
|
+
logger.debug(
|
|
182
|
+
"No conversation_id provided, skipping message IDs pruning")
|
|
183
|
+
self.pruning_stats["after_range_pruning"] = len(conversations)
|
|
184
|
+
return conversations
|
|
185
|
+
|
|
186
|
+
# Get message IDs configuration for this conversation
|
|
187
|
+
conversation_message_ids = self.message_ids_api.get_conversation_message_ids(
|
|
188
|
+
conversation_id)
|
|
189
|
+
if not conversation_message_ids:
|
|
190
|
+
logger.debug(
|
|
191
|
+
f"No message IDs configuration found for conversation {conversation_id}, skipping message IDs pruning")
|
|
192
|
+
self.pruning_stats["after_range_pruning"] = len(conversations)
|
|
193
|
+
return conversations
|
|
194
|
+
|
|
195
|
+
# Apply message IDs pruning
|
|
196
|
+
logger.info(
|
|
197
|
+
f"Applying message IDs pruning for conversation {conversation_id}")
|
|
198
|
+
self.pruning_stats["range_pruning_applied"] = True
|
|
199
|
+
|
|
200
|
+
try:
|
|
201
|
+
pruning_result = self.message_ids_pruner.prune_conversations(
|
|
202
|
+
conversations, conversation_message_ids)
|
|
203
|
+
logger.info(f"Message IDs: {pruning_result}")
|
|
204
|
+
|
|
205
|
+
if pruning_result.success:
|
|
206
|
+
self.pruning_stats["range_pruning_success"] = True
|
|
207
|
+
self.pruning_stats["after_range_pruning"] = pruning_result.pruned_length
|
|
208
|
+
|
|
209
|
+
# Log message IDs pruning results
|
|
210
|
+
logger.info(f"Message IDs pruning completed: {pruning_result.original_length} -> {pruning_result.pruned_length} messages "
|
|
211
|
+
f"(message IDs compression: {pruning_result.compression_ratio:.2%})")
|
|
212
|
+
|
|
213
|
+
# Log warnings if any
|
|
214
|
+
if pruning_result.warnings:
|
|
215
|
+
for warning in pruning_result.warnings:
|
|
216
|
+
logger.warning(f"Message IDs pruning: {warning}")
|
|
217
|
+
|
|
218
|
+
return pruning_result.pruned_conversations
|
|
219
|
+
else:
|
|
220
|
+
# Message IDs pruning failed, log error and continue with original conversations
|
|
221
|
+
logger.error(
|
|
222
|
+
f"Message IDs pruning failed: {pruning_result.error_message}")
|
|
223
|
+
self.pruning_stats["after_range_pruning"] = len(conversations)
|
|
224
|
+
return conversations
|
|
225
|
+
|
|
226
|
+
except Exception as e:
|
|
227
|
+
logger.error(
|
|
228
|
+
f"Message IDs pruning failed with exception: {str(e)}")
|
|
229
|
+
self.pruning_stats["after_range_pruning"] = len(conversations)
|
|
230
|
+
return conversations
|
|
231
|
+
|
|
232
|
+
def _unified_tool_cleanup_prune(self, conversations: List[Dict[str, Any]],
|
|
233
|
+
config: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
234
|
+
"""
|
|
235
|
+
Clean up both tool output results and tool call content in a unified process.
|
|
236
|
+
|
|
237
|
+
This method:
|
|
238
|
+
1. Identifies both tool result messages (role='user' with '<tool_result' in content)
|
|
239
|
+
and assistant messages containing tool calls with large content
|
|
240
|
+
2. Processes all cleanable messages in order, prioritizing tool results first
|
|
241
|
+
3. Stops when token count is within safe zone OR when less than 6 unpruned messages remain
|
|
242
|
+
"""
|
|
243
|
+
safe_zone_tokens = config.get("safe_zone_tokens", 80 * 1024)
|
|
244
|
+
# 使用深拷贝避免修改原始数据
|
|
245
|
+
processed_conversations = copy.deepcopy(conversations)
|
|
246
|
+
|
|
247
|
+
# 预先计算初始 token 数量,避免在循环中引用未定义的变量
|
|
248
|
+
current_tokens = count_string_tokens(json.dumps(
|
|
249
|
+
processed_conversations, ensure_ascii=False))
|
|
250
|
+
|
|
251
|
+
# Find all cleanable message indices with their types
|
|
252
|
+
cleanable_messages = []
|
|
253
|
+
|
|
254
|
+
# Find both tool result messages and tool call messages in one loop
|
|
255
|
+
for i, conv in enumerate(processed_conversations):
|
|
256
|
+
content = conv.get("content", "")
|
|
257
|
+
role = conv.get("role")
|
|
258
|
+
|
|
259
|
+
if isinstance(content, str):
|
|
260
|
+
# Check for tool result messages (user role)
|
|
261
|
+
if (role == "user" and self._is_tool_result_message(content)):
|
|
262
|
+
cleanable_messages.append(
|
|
263
|
+
{"index": i, "type": "tool_result"})
|
|
264
|
+
# Check for assistant messages with tool calls
|
|
265
|
+
elif (role == "assistant" and self.tool_content_detector.is_tool_call_content(content)):
|
|
266
|
+
cleanable_messages.append(
|
|
267
|
+
{"index": i, "type": "tool_call"})
|
|
268
|
+
|
|
269
|
+
# Sort by index to process in order, but prioritize tool_result messages
|
|
270
|
+
cleanable_messages.sort(key=lambda x: (
|
|
271
|
+
x["index"], x["type"] != "tool_result"))
|
|
272
|
+
|
|
273
|
+
logger.info(f"Found {len([m for m in cleanable_messages if m['type'] == 'tool_result'])} tool result messages "
|
|
274
|
+
f"and {len([m for m in cleanable_messages if m['type'] == 'tool_call'])} tool call messages to potentially clean")
|
|
275
|
+
|
|
276
|
+
# Track cleaned messages
|
|
277
|
+
cleaned_count = 0
|
|
278
|
+
|
|
279
|
+
# Clean messages one by one
|
|
280
|
+
for i, message_info in enumerate(cleanable_messages):
|
|
281
|
+
# 更新当前 token 数量
|
|
282
|
+
current_tokens = count_string_tokens(json.dumps(
|
|
283
|
+
processed_conversations, ensure_ascii=False))
|
|
284
|
+
|
|
285
|
+
# 检查停止条件
|
|
286
|
+
# 1. Token数已经在安全区域内
|
|
287
|
+
if current_tokens <= safe_zone_tokens:
|
|
288
|
+
logger.info(
|
|
289
|
+
f"Token count ({current_tokens}) is within safe zone ({safe_zone_tokens}), stopping cleanup")
|
|
290
|
+
break
|
|
291
|
+
|
|
292
|
+
# 2. 剩余未裁剪的对话少于6段
|
|
293
|
+
remaining_unpruned = len(
|
|
294
|
+
cleanable_messages) - (i + 1) # i+1 因为i是从0开始的索引
|
|
295
|
+
if remaining_unpruned < 6:
|
|
296
|
+
logger.info(
|
|
297
|
+
f"Less than 6 unpruned messages remaining ({remaining_unpruned}), stopping cleanup")
|
|
298
|
+
break
|
|
299
|
+
|
|
300
|
+
msg_index = message_info["index"]
|
|
301
|
+
msg_type = message_info["type"]
|
|
302
|
+
original_content = processed_conversations[msg_index]["content"]
|
|
303
|
+
|
|
304
|
+
if msg_type == "tool_result":
|
|
305
|
+
# Handle tool result cleanup
|
|
306
|
+
tool_name = self._extract_tool_name(original_content)
|
|
307
|
+
replacement_content = self._generate_replacement_message(
|
|
308
|
+
tool_name)
|
|
309
|
+
processed_conversations[msg_index]["content"] = replacement_content
|
|
310
|
+
cleaned_count += 1
|
|
311
|
+
|
|
312
|
+
logger.info(f"Cleaned tool result at index {msg_index} (tool: {tool_name}), "
|
|
313
|
+
f"reduced from {len(original_content)} to {len(replacement_content)} characters")
|
|
314
|
+
|
|
315
|
+
elif msg_type == "tool_call":
|
|
316
|
+
# Handle tool call content cleanup
|
|
317
|
+
tool_info = self.tool_content_detector.detect_tool_call(
|
|
318
|
+
original_content)
|
|
319
|
+
|
|
320
|
+
if tool_info:
|
|
321
|
+
new_content, replaced = self.tool_content_detector.replace_tool_content(
|
|
322
|
+
original_content, max_content_length=500
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
if replaced:
|
|
326
|
+
processed_conversations[msg_index]["content"] = new_content
|
|
327
|
+
cleaned_count += 1
|
|
328
|
+
logger.info(f"Cleaned tool call content at index {msg_index} (tool: {tool_info['tool_name']}), "
|
|
329
|
+
f"reduced from {len(original_content)} to {len(new_content)} characters")
|
|
330
|
+
|
|
331
|
+
final_tokens = count_string_tokens(json.dumps(
|
|
332
|
+
processed_conversations, ensure_ascii=False))
|
|
333
|
+
initial_tokens = count_string_tokens(
|
|
334
|
+
json.dumps(conversations, ensure_ascii=False))
|
|
335
|
+
logger.info(
|
|
336
|
+
f"Unified tool cleanup completed. Cleaned {cleaned_count} messages. Token count: {initial_tokens} -> {final_tokens}")
|
|
337
|
+
|
|
338
|
+
return processed_conversations
|
|
339
|
+
|
|
340
|
+
def _is_tool_result_message(self, content: str) -> bool:
|
|
341
|
+
"""
|
|
342
|
+
Check if a message content contains tool result XML.
|
|
343
|
+
|
|
344
|
+
Args:
|
|
345
|
+
content: Message content to check
|
|
346
|
+
|
|
347
|
+
Returns:
|
|
348
|
+
True if content contains tool result format
|
|
349
|
+
"""
|
|
350
|
+
if content is None:
|
|
351
|
+
return False
|
|
352
|
+
return "<tool_result" in content and "tool_name=" in content
|
|
353
|
+
|
|
354
|
+
def _extract_tool_name(self, content: str) -> str:
|
|
355
|
+
"""
|
|
356
|
+
Extract tool name from tool result XML content.
|
|
357
|
+
|
|
358
|
+
Args:
|
|
359
|
+
content: Tool result XML content
|
|
360
|
+
|
|
361
|
+
Returns:
|
|
362
|
+
Tool name or 'unknown' if not found
|
|
363
|
+
"""
|
|
364
|
+
# Pattern to match: <tool_result tool_name='...' or <tool_result tool_name="..."
|
|
365
|
+
pattern = r"<tool_result[^>]*tool_name=['\"]([^'\"]*)['\"]"
|
|
366
|
+
match = re.search(pattern, content)
|
|
367
|
+
|
|
368
|
+
if match:
|
|
369
|
+
return match.group(1)
|
|
370
|
+
return "unknown"
|
|
371
|
+
|
|
372
|
+
def _generate_replacement_message(self, tool_name: str) -> str:
|
|
373
|
+
"""
|
|
374
|
+
Generate a replacement message for a cleaned tool result.
|
|
375
|
+
|
|
376
|
+
Args:
|
|
377
|
+
tool_name: Name of the tool that was called
|
|
378
|
+
|
|
379
|
+
Returns:
|
|
380
|
+
Replacement message string
|
|
381
|
+
"""
|
|
382
|
+
if tool_name and tool_name != "unknown":
|
|
383
|
+
return (f"<tool_result tool_name='{tool_name}' success='true'>"
|
|
384
|
+
f"<message>Content cleared to save tokens</message>"
|
|
385
|
+
f"<content>{self.replacement_message}</content>"
|
|
386
|
+
f"</tool_result>")
|
|
387
|
+
else:
|
|
388
|
+
return f"<tool_result success='true'><message>[Content cleared to save tokens, you can call the tool again to get the tool result.]</message><content>{self.replacement_message}</content></tool_result>"
|
|
389
|
+
|
|
390
|
+
def get_cleanup_statistics(self, original_conversations: List[Dict[str, Any]],
|
|
391
|
+
pruned_conversations: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
392
|
+
"""
|
|
393
|
+
Get statistics about the cleanup process.
|
|
394
|
+
|
|
395
|
+
Args:
|
|
396
|
+
original_conversations: Original conversation list
|
|
397
|
+
pruned_conversations: Pruned conversation list
|
|
398
|
+
|
|
399
|
+
Returns:
|
|
400
|
+
Dictionary with cleanup statistics
|
|
401
|
+
"""
|
|
402
|
+
original_tokens = count_string_tokens(
|
|
403
|
+
json.dumps(original_conversations, ensure_ascii=False))
|
|
404
|
+
pruned_tokens = count_string_tokens(
|
|
405
|
+
json.dumps(pruned_conversations, ensure_ascii=False))
|
|
406
|
+
|
|
407
|
+
# Count cleaned tool results
|
|
408
|
+
tool_results_cleaned = 0
|
|
409
|
+
tool_calls_cleaned = 0
|
|
410
|
+
|
|
411
|
+
for orig, pruned in zip(original_conversations, pruned_conversations):
|
|
412
|
+
if orig.get("content") != pruned.get("content"):
|
|
413
|
+
# Check if it's a tool result message (user role)
|
|
414
|
+
if (orig.get("role") == "user" and
|
|
415
|
+
self._is_tool_result_message(orig.get("content", ""))):
|
|
416
|
+
tool_results_cleaned += 1
|
|
417
|
+
|
|
418
|
+
# Check if it's a tool call message (assistant role)
|
|
419
|
+
elif (orig.get("role") == "assistant" and
|
|
420
|
+
self.tool_content_detector.is_tool_call_content(orig.get("content", ""))):
|
|
421
|
+
tool_calls_cleaned += 1
|
|
422
|
+
|
|
423
|
+
return {
|
|
424
|
+
"original_tokens": original_tokens,
|
|
425
|
+
"pruned_tokens": pruned_tokens,
|
|
426
|
+
"tokens_saved": original_tokens - pruned_tokens,
|
|
427
|
+
"compression_ratio": pruned_tokens / original_tokens if original_tokens > 0 else 1.0,
|
|
428
|
+
"tool_results_cleaned": tool_results_cleaned,
|
|
429
|
+
"tool_calls_cleaned": tool_calls_cleaned,
|
|
430
|
+
"total_messages": len(original_conversations)
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
def get_pruning_statistics(self) -> Dict[str, Any]:
|
|
434
|
+
"""
|
|
435
|
+
Get comprehensive pruning statistics including both range and tool cleanup.
|
|
436
|
+
|
|
437
|
+
Returns:
|
|
438
|
+
Dictionary with complete pruning statistics
|
|
439
|
+
"""
|
|
440
|
+
return {
|
|
441
|
+
"range_pruning": {
|
|
442
|
+
"applied": self.pruning_stats["range_pruning_applied"],
|
|
443
|
+
"success": self.pruning_stats["range_pruning_success"],
|
|
444
|
+
"conversation_id": self._get_current_conversation_id()
|
|
445
|
+
},
|
|
446
|
+
"message_counts": {
|
|
447
|
+
"original": self.pruning_stats["original_length"],
|
|
448
|
+
"after_range_pruning": self.pruning_stats["after_range_pruning"],
|
|
449
|
+
"after_tool_cleanup": self.pruning_stats["after_tool_cleanup"]
|
|
450
|
+
},
|
|
451
|
+
"compression": {
|
|
452
|
+
"range_pruning_ratio": (
|
|
453
|
+
self.pruning_stats["after_range_pruning"] /
|
|
454
|
+
self.pruning_stats["original_length"]
|
|
455
|
+
if self.pruning_stats["original_length"] > 0 else 1.0
|
|
456
|
+
),
|
|
457
|
+
"tool_cleanup_ratio": (
|
|
458
|
+
self.pruning_stats["after_tool_cleanup"] /
|
|
459
|
+
self.pruning_stats["after_range_pruning"]
|
|
460
|
+
if self.pruning_stats["after_range_pruning"] > 0 else 1.0
|
|
461
|
+
),
|
|
462
|
+
"total_compression_ratio": self.pruning_stats["total_compression_ratio"]
|
|
463
|
+
},
|
|
464
|
+
"messages_removed": {
|
|
465
|
+
"by_range_pruning": (
|
|
466
|
+
self.pruning_stats["original_length"] -
|
|
467
|
+
self.pruning_stats["after_range_pruning"]
|
|
468
|
+
),
|
|
469
|
+
"by_tool_cleanup": (
|
|
470
|
+
self.pruning_stats["after_range_pruning"] -
|
|
471
|
+
self.pruning_stats["after_tool_cleanup"]
|
|
472
|
+
),
|
|
473
|
+
"total_removed": (
|
|
474
|
+
self.pruning_stats["original_length"] -
|
|
475
|
+
self.pruning_stats["after_tool_cleanup"]
|
|
476
|
+
)
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
def _compare_and_log_conversations(self, original_conversations: List[Dict[str, Any]],
|
|
481
|
+
pruned_conversations: List[Dict[str, Any]]) -> None:
|
|
482
|
+
"""
|
|
483
|
+
独立的方法:对比裁剪前后的conversations,生成详细的对比报告并记录到日志中。
|
|
484
|
+
|
|
485
|
+
Args:
|
|
486
|
+
original_conversations: 裁剪前的对话列表
|
|
487
|
+
pruned_conversations: 裁剪后的对话列表
|
|
488
|
+
"""
|
|
489
|
+
try:
|
|
490
|
+
# 生成对比分析报告
|
|
491
|
+
comparison_report = self._generate_comparison_report(
|
|
492
|
+
original_conversations, pruned_conversations)
|
|
493
|
+
|
|
494
|
+
# 记录详细的对比日志
|
|
495
|
+
logger.info("=== 对话裁剪前后对比分析 ===")
|
|
496
|
+
logger.info(
|
|
497
|
+
f"原始对话数量: {comparison_report['message_counts']['original']}")
|
|
498
|
+
logger.info(
|
|
499
|
+
f"裁剪后对话数量: {comparison_report['message_counts']['final']}")
|
|
500
|
+
logger.info(
|
|
501
|
+
f"删除的对话数量: {comparison_report['message_counts']['removed']}")
|
|
502
|
+
logger.info(
|
|
503
|
+
f"消息压缩比: {comparison_report['compression']['message_compression_ratio']:.2%}")
|
|
504
|
+
logger.info(
|
|
505
|
+
f"Token压缩比: {comparison_report['compression']['token_compression_ratio']:.2%}")
|
|
506
|
+
logger.info(f"Token节省数量: {comparison_report['tokens']['saved']:,}")
|
|
507
|
+
|
|
508
|
+
if comparison_report['changes']['tool_results_modified'] > 0:
|
|
509
|
+
logger.info(
|
|
510
|
+
f"工具结果消息清理数量: {comparison_report['changes']['tool_results_modified']}")
|
|
511
|
+
|
|
512
|
+
if comparison_report['changes']['tool_calls_modified'] > 0:
|
|
513
|
+
logger.info(
|
|
514
|
+
f"工具调用内容清理数量: {comparison_report['changes']['tool_calls_modified']}")
|
|
515
|
+
|
|
516
|
+
if comparison_report['changes']['messages_removed_by_ids'] > 0:
|
|
517
|
+
logger.info(
|
|
518
|
+
f"基于消息ID删除的消息数量: {comparison_report['changes']['messages_removed_by_ids']}")
|
|
519
|
+
|
|
520
|
+
# 保存详细的对比报告到文件日志
|
|
521
|
+
save_formatted_log(
|
|
522
|
+
self.args.source_dir,
|
|
523
|
+
json.dumps(comparison_report, ensure_ascii=False, indent=2),
|
|
524
|
+
"conversation_comparison_report",
|
|
525
|
+
conversation_id=self._get_current_conversation_id()
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
except Exception as e:
|
|
529
|
+
logger.error(f"生成对话对比报告时出错: {str(e)}")
|
|
530
|
+
logger.exception(e)
|
|
531
|
+
|
|
532
|
+
def _generate_comparison_report(self, original_conversations: List[Dict[str, Any]],
|
|
533
|
+
pruned_conversations: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
534
|
+
"""
|
|
535
|
+
生成详细的对比分析报告。
|
|
536
|
+
|
|
537
|
+
Args:
|
|
538
|
+
original_conversations: 裁剪前的对话列表
|
|
539
|
+
pruned_conversations: 裁剪后的对话列表
|
|
540
|
+
|
|
541
|
+
Returns:
|
|
542
|
+
包含详细对比信息的字典
|
|
543
|
+
"""
|
|
544
|
+
# 基础统计信息
|
|
545
|
+
original_count = len(original_conversations)
|
|
546
|
+
pruned_count = len(pruned_conversations)
|
|
547
|
+
removed_count = original_count - pruned_count
|
|
548
|
+
|
|
549
|
+
# Token统计
|
|
550
|
+
original_tokens = count_string_tokens(
|
|
551
|
+
json.dumps(original_conversations, ensure_ascii=False))
|
|
552
|
+
pruned_tokens = count_string_tokens(
|
|
553
|
+
json.dumps(pruned_conversations, ensure_ascii=False))
|
|
554
|
+
tokens_saved = original_tokens - pruned_tokens
|
|
555
|
+
|
|
556
|
+
# 分析变化详情
|
|
557
|
+
changes_analysis = self._analyze_conversation_changes(
|
|
558
|
+
original_conversations, pruned_conversations)
|
|
559
|
+
|
|
560
|
+
# 分析消息类型分布
|
|
561
|
+
original_distribution = self._analyze_message_distribution(
|
|
562
|
+
original_conversations)
|
|
563
|
+
pruned_distribution = self._analyze_message_distribution(
|
|
564
|
+
pruned_conversations)
|
|
565
|
+
|
|
566
|
+
# 生成完整的对比报告
|
|
567
|
+
report = {
|
|
568
|
+
"timestamp": str(__import__("datetime").datetime.now()),
|
|
569
|
+
"conversation_id": self._get_current_conversation_id(),
|
|
570
|
+
"pruning_strategy": {
|
|
571
|
+
"range_pruning_applied": self.pruning_stats["range_pruning_applied"],
|
|
572
|
+
"tool_cleanup_applied": True,
|
|
573
|
+
"safe_zone_tokens": self._get_parsed_safe_zone_tokens()
|
|
574
|
+
},
|
|
575
|
+
"message_counts": {
|
|
576
|
+
"original": original_count,
|
|
577
|
+
"final": pruned_count,
|
|
578
|
+
"removed": removed_count,
|
|
579
|
+
"after_range_pruning": self.pruning_stats.get("after_range_pruning", original_count)
|
|
580
|
+
},
|
|
581
|
+
"tokens": {
|
|
582
|
+
"original": original_tokens,
|
|
583
|
+
"final": pruned_tokens,
|
|
584
|
+
"saved": tokens_saved,
|
|
585
|
+
"safe_zone_limit": self._get_parsed_safe_zone_tokens()
|
|
586
|
+
},
|
|
587
|
+
"compression": {
|
|
588
|
+
"message_compression_ratio": pruned_count / original_count if original_count > 0 else 1.0,
|
|
589
|
+
"token_compression_ratio": pruned_tokens / original_tokens if original_tokens > 0 else 1.0,
|
|
590
|
+
"range_pruning_compression": (
|
|
591
|
+
self.pruning_stats.get(
|
|
592
|
+
"after_range_pruning", original_count) / original_count
|
|
593
|
+
if original_count > 0 else 1.0
|
|
594
|
+
),
|
|
595
|
+
"tool_cleanup_compression": (
|
|
596
|
+
pruned_count /
|
|
597
|
+
self.pruning_stats.get(
|
|
598
|
+
"after_range_pruning", original_count)
|
|
599
|
+
if self.pruning_stats.get("after_range_pruning", original_count) > 0 else 1.0
|
|
600
|
+
)
|
|
601
|
+
},
|
|
602
|
+
"changes": {
|
|
603
|
+
"messages_removed_by_ids": (
|
|
604
|
+
original_count -
|
|
605
|
+
self.pruning_stats.get(
|
|
606
|
+
"after_range_pruning", original_count)
|
|
607
|
+
),
|
|
608
|
+
"tool_results_modified": changes_analysis["tool_results_modified"],
|
|
609
|
+
"tool_calls_modified": changes_analysis["tool_calls_modified"],
|
|
610
|
+
"content_modifications": changes_analysis["content_modifications"],
|
|
611
|
+
"unchanged_messages": changes_analysis["unchanged_messages"]
|
|
612
|
+
},
|
|
613
|
+
"message_distribution": {
|
|
614
|
+
"original": original_distribution,
|
|
615
|
+
"pruned": pruned_distribution
|
|
616
|
+
},
|
|
617
|
+
"detailed_changes": changes_analysis["detailed_changes"],
|
|
618
|
+
"pruning_effectiveness": {
|
|
619
|
+
"tokens_per_message_before": original_tokens / original_count if original_count > 0 else 0,
|
|
620
|
+
"tokens_per_message_after": pruned_tokens / pruned_count if pruned_count > 0 else 0,
|
|
621
|
+
"average_token_reduction_per_message": tokens_saved / original_count if original_count > 0 else 0,
|
|
622
|
+
"within_safe_zone": pruned_tokens <= self._get_parsed_safe_zone_tokens()
|
|
623
|
+
}
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
return report
|
|
627
|
+
|
|
628
|
+
def _analyze_conversation_changes(self, original_conversations: List[Dict[str, Any]],
|
|
629
|
+
pruned_conversations: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
630
|
+
"""
|
|
631
|
+
分析对话变化的详细信息。
|
|
632
|
+
|
|
633
|
+
Args:
|
|
634
|
+
original_conversations: 原始对话列表
|
|
635
|
+
pruned_conversations: 裁剪后对话列表
|
|
636
|
+
|
|
637
|
+
Returns:
|
|
638
|
+
包含变化分析的字典
|
|
639
|
+
"""
|
|
640
|
+
tool_results_modified = 0
|
|
641
|
+
tool_calls_modified = 0
|
|
642
|
+
content_modifications = 0
|
|
643
|
+
unchanged_messages = 0
|
|
644
|
+
detailed_changes = []
|
|
645
|
+
|
|
646
|
+
# 创建一个映射来匹配原始和裁剪后的消息
|
|
647
|
+
min_length = min(len(original_conversations),
|
|
648
|
+
len(pruned_conversations))
|
|
649
|
+
|
|
650
|
+
for i in range(min_length):
|
|
651
|
+
original_msg = original_conversations[i]
|
|
652
|
+
pruned_msg = pruned_conversations[i]
|
|
653
|
+
|
|
654
|
+
original_content = original_msg.get("content", "")
|
|
655
|
+
pruned_content = pruned_msg.get("content", "")
|
|
656
|
+
|
|
657
|
+
if original_content != pruned_content:
|
|
658
|
+
content_modifications += 1
|
|
659
|
+
|
|
660
|
+
# 分析修改类型
|
|
661
|
+
change_type = "content_modified"
|
|
662
|
+
tool_name = None
|
|
663
|
+
|
|
664
|
+
if (original_msg.get("role") == "user" and
|
|
665
|
+
self._is_tool_result_message(original_content)):
|
|
666
|
+
tool_results_modified += 1
|
|
667
|
+
change_type = "tool_result_cleaned"
|
|
668
|
+
tool_name = self._extract_tool_name(original_content)
|
|
669
|
+
|
|
670
|
+
elif (original_msg.get("role") == "assistant" and
|
|
671
|
+
self.tool_content_detector.is_tool_call_content(original_content)):
|
|
672
|
+
tool_calls_modified += 1
|
|
673
|
+
change_type = "tool_call_cleaned"
|
|
674
|
+
tool_info = self.tool_content_detector.detect_tool_call(
|
|
675
|
+
original_content)
|
|
676
|
+
tool_name = tool_info.get(
|
|
677
|
+
"tool_name") if tool_info else "unknown"
|
|
678
|
+
|
|
679
|
+
detailed_changes.append({
|
|
680
|
+
"message_index": i,
|
|
681
|
+
"role": original_msg.get("role"),
|
|
682
|
+
"change_type": change_type,
|
|
683
|
+
"tool_name": tool_name,
|
|
684
|
+
"original_length": len(original_content),
|
|
685
|
+
"pruned_length": len(pruned_content),
|
|
686
|
+
"size_reduction": len(original_content) - len(pruned_content)
|
|
687
|
+
})
|
|
688
|
+
else:
|
|
689
|
+
unchanged_messages += 1
|
|
690
|
+
|
|
691
|
+
return {
|
|
692
|
+
"tool_results_modified": tool_results_modified,
|
|
693
|
+
"tool_calls_modified": tool_calls_modified,
|
|
694
|
+
"content_modifications": content_modifications,
|
|
695
|
+
"unchanged_messages": unchanged_messages,
|
|
696
|
+
"detailed_changes": detailed_changes
|
|
697
|
+
}
|
|
698
|
+
|
|
699
|
+
def _analyze_message_distribution(self, conversations: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
700
|
+
"""
|
|
701
|
+
分析消息的角色分布和类型分布。
|
|
702
|
+
|
|
703
|
+
Args:
|
|
704
|
+
conversations: 对话列表
|
|
705
|
+
|
|
706
|
+
Returns:
|
|
707
|
+
包含分布信息的字典
|
|
708
|
+
"""
|
|
709
|
+
role_counts = {"user": 0, "assistant": 0, "system": 0, "other": 0}
|
|
710
|
+
message_types = {
|
|
711
|
+
"tool_result": 0,
|
|
712
|
+
"tool_call": 0,
|
|
713
|
+
"regular_user": 0,
|
|
714
|
+
"regular_assistant": 0,
|
|
715
|
+
"system": 0
|
|
716
|
+
}
|
|
717
|
+
|
|
718
|
+
for msg in conversations:
|
|
719
|
+
role = msg.get("role", "other")
|
|
720
|
+
content = msg.get("content", "")
|
|
721
|
+
|
|
722
|
+
# 统计角色分布
|
|
723
|
+
if role in role_counts:
|
|
724
|
+
role_counts[role] += 1
|
|
725
|
+
else:
|
|
726
|
+
role_counts["other"] += 1
|
|
727
|
+
|
|
728
|
+
# 统计消息类型分布
|
|
729
|
+
if role == "system":
|
|
730
|
+
message_types["system"] += 1
|
|
731
|
+
elif role == "user":
|
|
732
|
+
if self._is_tool_result_message(content):
|
|
733
|
+
message_types["tool_result"] += 1
|
|
734
|
+
else:
|
|
735
|
+
message_types["regular_user"] += 1
|
|
736
|
+
elif role == "assistant":
|
|
737
|
+
if self.tool_content_detector.is_tool_call_content(content):
|
|
738
|
+
message_types["tool_call"] += 1
|
|
739
|
+
else:
|
|
740
|
+
message_types["regular_assistant"] += 1
|
|
741
|
+
|
|
742
|
+
return {
|
|
743
|
+
"total_messages": len(conversations),
|
|
744
|
+
"role_distribution": role_counts,
|
|
745
|
+
"message_type_distribution": message_types
|
|
746
|
+
}
|