auto-coder 1.0.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of auto-coder might be problematic. Click here for more details.

Files changed (574) hide show
  1. auto_coder-2.0.0.dist-info/LICENSE +158 -0
  2. auto_coder-2.0.0.dist-info/METADATA +558 -0
  3. auto_coder-2.0.0.dist-info/RECORD +795 -0
  4. {auto_coder-1.0.0.dist-info → auto_coder-2.0.0.dist-info}/WHEEL +1 -1
  5. {auto_coder-1.0.0.dist-info → auto_coder-2.0.0.dist-info}/entry_points.txt +3 -3
  6. autocoder/__init__.py +31 -0
  7. autocoder/agent/auto_filegroup.py +32 -13
  8. autocoder/agent/auto_learn_from_commit.py +9 -1
  9. autocoder/agent/base_agentic/__init__.py +3 -0
  10. autocoder/agent/base_agentic/agent_hub.py +1 -1
  11. autocoder/agent/base_agentic/base_agent.py +235 -136
  12. autocoder/agent/base_agentic/default_tools.py +119 -118
  13. autocoder/agent/base_agentic/test_base_agent.py +1 -1
  14. autocoder/agent/base_agentic/tool_registry.py +32 -20
  15. autocoder/agent/base_agentic/tools/read_file_tool_resolver.py +24 -3
  16. autocoder/agent/base_agentic/tools/write_to_file_tool_resolver.py +24 -11
  17. autocoder/agent/base_agentic/types.py +42 -0
  18. autocoder/agent/entry_command_agent/chat.py +73 -59
  19. autocoder/auto_coder.py +31 -40
  20. autocoder/auto_coder_rag.py +11 -1084
  21. autocoder/auto_coder_runner.py +970 -2345
  22. autocoder/auto_coder_terminal.py +26 -0
  23. autocoder/auto_coder_terminal_v3.py +190 -0
  24. autocoder/chat/conf_command.py +224 -124
  25. autocoder/chat/models_command.py +361 -299
  26. autocoder/chat/rules_command.py +79 -31
  27. autocoder/chat_auto_coder.py +988 -398
  28. autocoder/chat_auto_coder_lang.py +23 -732
  29. autocoder/commands/auto_command.py +25 -8
  30. autocoder/commands/auto_web.py +1 -1
  31. autocoder/commands/tools.py +44 -44
  32. autocoder/common/__init__.py +150 -128
  33. autocoder/common/ac_style_command_parser/__init__.py +39 -2
  34. autocoder/common/ac_style_command_parser/config.py +422 -0
  35. autocoder/common/ac_style_command_parser/parser.py +292 -78
  36. autocoder/common/ac_style_command_parser/test_parser.py +241 -16
  37. autocoder/common/ac_style_command_parser/test_typed_parser.py +342 -0
  38. autocoder/common/ac_style_command_parser/typed_parser.py +653 -0
  39. autocoder/common/action_yml_file_manager.py +25 -13
  40. autocoder/common/agent_events/__init__.py +52 -0
  41. autocoder/common/agent_events/agent_event_emitter.py +193 -0
  42. autocoder/common/agent_events/event_factory.py +177 -0
  43. autocoder/common/agent_events/examples.py +307 -0
  44. autocoder/common/agent_events/types.py +113 -0
  45. autocoder/common/agent_events/utils.py +68 -0
  46. autocoder/common/agent_hooks/__init__.py +44 -0
  47. autocoder/common/agent_hooks/examples.py +582 -0
  48. autocoder/common/agent_hooks/hook_executor.py +217 -0
  49. autocoder/common/agent_hooks/hook_manager.py +288 -0
  50. autocoder/common/agent_hooks/types.py +133 -0
  51. autocoder/common/agent_hooks/utils.py +99 -0
  52. autocoder/common/agent_query_queue/queue_executor.py +324 -0
  53. autocoder/common/agent_query_queue/queue_manager.py +325 -0
  54. autocoder/common/agents/__init__.py +11 -0
  55. autocoder/common/agents/agent_manager.py +323 -0
  56. autocoder/common/agents/agent_parser.py +189 -0
  57. autocoder/common/agents/example_usage.py +344 -0
  58. autocoder/common/agents/integration_example.py +330 -0
  59. autocoder/common/agents/test_agent_parser.py +545 -0
  60. autocoder/common/async_utils.py +101 -0
  61. autocoder/common/auto_coder_lang.py +23 -972
  62. autocoder/common/autocoderargs_parser/__init__.py +14 -0
  63. autocoder/common/autocoderargs_parser/parser.py +184 -0
  64. autocoder/common/autocoderargs_parser/tests/__init__.py +1 -0
  65. autocoder/common/autocoderargs_parser/tests/test_args_parser.py +235 -0
  66. autocoder/common/autocoderargs_parser/tests/test_token_parser.py +195 -0
  67. autocoder/common/autocoderargs_parser/token_parser.py +290 -0
  68. autocoder/common/buildin_tokenizer.py +2 -4
  69. autocoder/common/code_auto_generate.py +149 -74
  70. autocoder/common/code_auto_generate_diff.py +163 -70
  71. autocoder/common/code_auto_generate_editblock.py +179 -89
  72. autocoder/common/code_auto_generate_strict_diff.py +167 -72
  73. autocoder/common/code_auto_merge_editblock.py +13 -6
  74. autocoder/common/code_modification_ranker.py +1 -1
  75. autocoder/common/command_completer.py +3 -3
  76. autocoder/common/command_file_manager/manager.py +183 -47
  77. autocoder/common/command_file_manager/test_command_file_manager.py +507 -0
  78. autocoder/common/command_templates.py +1 -1
  79. autocoder/common/conf_utils.py +2 -4
  80. autocoder/common/conversations/config.py +11 -3
  81. autocoder/common/conversations/get_conversation_manager.py +100 -2
  82. autocoder/common/conversations/llm_stats_models.py +264 -0
  83. autocoder/common/conversations/manager.py +112 -28
  84. autocoder/common/conversations/models.py +16 -2
  85. autocoder/common/conversations/storage/index_manager.py +134 -10
  86. autocoder/common/core_config/__init__.py +63 -0
  87. autocoder/common/core_config/agentic_mode_manager.py +109 -0
  88. autocoder/common/core_config/base_manager.py +123 -0
  89. autocoder/common/core_config/compatibility.py +151 -0
  90. autocoder/common/core_config/config_manager.py +156 -0
  91. autocoder/common/core_config/conversation_manager.py +31 -0
  92. autocoder/common/core_config/exclude_manager.py +72 -0
  93. autocoder/common/core_config/file_manager.py +177 -0
  94. autocoder/common/core_config/human_as_model_manager.py +129 -0
  95. autocoder/common/core_config/lib_manager.py +54 -0
  96. autocoder/common/core_config/main_manager.py +81 -0
  97. autocoder/common/core_config/mode_manager.py +126 -0
  98. autocoder/common/core_config/models.py +70 -0
  99. autocoder/common/core_config/test_memory_manager.py +1056 -0
  100. autocoder/common/env_manager.py +282 -0
  101. autocoder/common/env_manager_usage_example.py +211 -0
  102. autocoder/common/file_checkpoint/conversation_checkpoint.py +19 -19
  103. autocoder/common/file_checkpoint/manager.py +264 -48
  104. autocoder/common/file_checkpoint/test_backup.py +1 -18
  105. autocoder/common/file_checkpoint/test_manager.py +270 -1
  106. autocoder/common/file_checkpoint/test_store.py +1 -17
  107. autocoder/common/file_handler/__init__.py +23 -0
  108. autocoder/common/file_handler/active_context_handler.py +159 -0
  109. autocoder/common/file_handler/add_files_handler.py +409 -0
  110. autocoder/common/file_handler/chat_handler.py +180 -0
  111. autocoder/common/file_handler/coding_handler.py +401 -0
  112. autocoder/common/file_handler/commit_handler.py +200 -0
  113. autocoder/common/file_handler/lib_handler.py +156 -0
  114. autocoder/common/file_handler/list_files_handler.py +111 -0
  115. autocoder/common/file_handler/mcp_handler.py +268 -0
  116. autocoder/common/file_handler/models_handler.py +493 -0
  117. autocoder/common/file_handler/remove_files_handler.py +172 -0
  118. autocoder/common/git_utils.py +44 -8
  119. autocoder/common/global_cancel.py +15 -6
  120. autocoder/common/ignorefiles/test_ignore_file_utils.py +1 -1
  121. autocoder/common/international/__init__.py +31 -0
  122. autocoder/common/international/demo_international.py +92 -0
  123. autocoder/common/international/message_manager.py +157 -0
  124. autocoder/common/international/messages/__init__.py +56 -0
  125. autocoder/common/international/messages/async_command_messages.py +507 -0
  126. autocoder/common/international/messages/auto_coder_messages.py +2208 -0
  127. autocoder/common/international/messages/chat_auto_coder_messages.py +1547 -0
  128. autocoder/common/international/messages/command_help_messages.py +986 -0
  129. autocoder/common/international/messages/conversation_command_messages.py +191 -0
  130. autocoder/common/international/messages/git_helper_plugin_messages.py +159 -0
  131. autocoder/common/international/messages/queue_command_messages.py +751 -0
  132. autocoder/common/international/messages/rules_command_messages.py +77 -0
  133. autocoder/common/international/messages/sdk_messages.py +1707 -0
  134. autocoder/common/international/messages/token_helper_plugin_messages.py +361 -0
  135. autocoder/common/international/messages/tool_display_messages.py +1212 -0
  136. autocoder/common/international/messages/workflow_exception_messages.py +473 -0
  137. autocoder/common/international/test_international.py +612 -0
  138. autocoder/common/linter_core/__init__.py +28 -0
  139. autocoder/common/linter_core/base_linter.py +61 -0
  140. autocoder/common/linter_core/config_loader.py +271 -0
  141. autocoder/common/linter_core/formatters/__init__.py +0 -0
  142. autocoder/common/linter_core/formatters/base_formatter.py +38 -0
  143. autocoder/common/linter_core/formatters/raw_formatter.py +17 -0
  144. autocoder/common/linter_core/linter.py +166 -0
  145. autocoder/common/linter_core/linter_factory.py +216 -0
  146. autocoder/common/linter_core/linter_manager.py +333 -0
  147. autocoder/common/linter_core/linters/__init__.py +9 -0
  148. autocoder/common/linter_core/linters/java_linter.py +342 -0
  149. autocoder/common/linter_core/linters/python_linter.py +115 -0
  150. autocoder/common/linter_core/linters/typescript_linter.py +119 -0
  151. autocoder/common/linter_core/models/__init__.py +7 -0
  152. autocoder/common/linter_core/models/lint_result.py +91 -0
  153. autocoder/common/linter_core/models.py +33 -0
  154. autocoder/common/linter_core/tests/__init__.py +3 -0
  155. autocoder/common/linter_core/tests/test_config_loader.py +323 -0
  156. autocoder/common/linter_core/tests/test_config_loading.py +308 -0
  157. autocoder/common/linter_core/tests/test_factory_manager.py +234 -0
  158. autocoder/common/linter_core/tests/test_formatters.py +147 -0
  159. autocoder/common/linter_core/tests/test_integration.py +317 -0
  160. autocoder/common/linter_core/tests/test_java_linter.py +496 -0
  161. autocoder/common/linter_core/tests/test_linters.py +265 -0
  162. autocoder/common/linter_core/tests/test_models.py +81 -0
  163. autocoder/common/linter_core/tests/verify_config_loading.py +296 -0
  164. autocoder/common/linter_core/tests/verify_fixes.py +183 -0
  165. autocoder/common/llm_friendly_package/__init__.py +31 -0
  166. autocoder/common/llm_friendly_package/base_manager.py +102 -0
  167. autocoder/common/llm_friendly_package/docs_manager.py +121 -0
  168. autocoder/common/llm_friendly_package/library_manager.py +171 -0
  169. autocoder/common/{llm_friendly_package.py → llm_friendly_package/main_manager.py} +204 -231
  170. autocoder/common/llm_friendly_package/models.py +40 -0
  171. autocoder/common/llm_friendly_package/test_llm_friendly_package.py +536 -0
  172. autocoder/common/llms/__init__.py +15 -0
  173. autocoder/common/llms/demo_error_handling.py +85 -0
  174. autocoder/common/llms/factory.py +142 -0
  175. autocoder/common/llms/manager.py +264 -0
  176. autocoder/common/llms/pricing.py +121 -0
  177. autocoder/common/llms/registry.py +288 -0
  178. autocoder/common/llms/schema.py +77 -0
  179. autocoder/common/llms/simple_demo.py +45 -0
  180. autocoder/common/llms/test_quick_model.py +116 -0
  181. autocoder/common/llms/test_remove_functionality.py +182 -0
  182. autocoder/common/llms/tests/__init__.py +1 -0
  183. autocoder/common/llms/tests/test_manager.py +330 -0
  184. autocoder/common/llms/tests/test_registry.py +364 -0
  185. autocoder/common/mcp_tools/__init__.py +62 -0
  186. autocoder/common/{mcp_tools.py → mcp_tools/executor.py} +49 -40
  187. autocoder/common/{mcp_hub.py → mcp_tools/hub.py} +42 -68
  188. autocoder/common/{mcp_server_install.py → mcp_tools/installer.py} +16 -28
  189. autocoder/common/{mcp_server.py → mcp_tools/server.py} +176 -48
  190. autocoder/common/mcp_tools/test_keyboard_interrupt.py +93 -0
  191. autocoder/common/mcp_tools/test_mcp_tools.py +391 -0
  192. autocoder/common/{mcp_server_types.py → mcp_tools/types.py} +121 -48
  193. autocoder/common/mcp_tools/verify_functionality.py +202 -0
  194. autocoder/common/model_speed_tester.py +32 -26
  195. autocoder/common/priority_directory_finder/__init__.py +142 -0
  196. autocoder/common/priority_directory_finder/examples.py +230 -0
  197. autocoder/common/priority_directory_finder/finder.py +283 -0
  198. autocoder/common/priority_directory_finder/models.py +236 -0
  199. autocoder/common/priority_directory_finder/test_priority_directory_finder.py +431 -0
  200. autocoder/common/project_scanner/__init__.py +18 -0
  201. autocoder/common/project_scanner/compat.py +77 -0
  202. autocoder/common/project_scanner/scanner.py +436 -0
  203. autocoder/common/project_tracker/__init__.py +27 -0
  204. autocoder/common/project_tracker/api.py +228 -0
  205. autocoder/common/project_tracker/demo.py +272 -0
  206. autocoder/common/project_tracker/tracker.py +487 -0
  207. autocoder/common/project_tracker/types.py +53 -0
  208. autocoder/common/pruner/__init__.py +67 -0
  209. autocoder/common/pruner/agentic_conversation_pruner.py +651 -102
  210. autocoder/common/pruner/conversation_message_ids_api.py +386 -0
  211. autocoder/common/pruner/conversation_message_ids_manager.py +347 -0
  212. autocoder/common/pruner/conversation_message_ids_pruner.py +473 -0
  213. autocoder/common/pruner/conversation_normalizer.py +347 -0
  214. autocoder/common/pruner/conversation_pruner.py +26 -6
  215. autocoder/common/pruner/test_agentic_conversation_pruner.py +554 -112
  216. autocoder/common/pruner/test_conversation_normalizer.py +502 -0
  217. autocoder/common/pruner/test_tool_content_detector.py +324 -0
  218. autocoder/common/pruner/tool_content_detector.py +227 -0
  219. autocoder/common/pruner/tools/__init__.py +18 -0
  220. autocoder/common/pruner/tools/query_message_ids.py +264 -0
  221. autocoder/common/pruner/tools/test_agentic_pruning_logic.py +432 -0
  222. autocoder/common/pruner/tools/test_message_ids_pruning_only.py +192 -0
  223. autocoder/common/pull_requests/__init__.py +9 -1
  224. autocoder/common/pull_requests/utils.py +122 -1
  225. autocoder/common/rag_manager/rag_manager.py +36 -40
  226. autocoder/common/rulefiles/__init__.py +53 -1
  227. autocoder/common/rulefiles/api.py +250 -0
  228. autocoder/common/rulefiles/core/__init__.py +14 -0
  229. autocoder/common/rulefiles/core/manager.py +241 -0
  230. autocoder/common/rulefiles/core/selector.py +805 -0
  231. autocoder/common/rulefiles/models/__init__.py +20 -0
  232. autocoder/common/rulefiles/models/index.py +16 -0
  233. autocoder/common/rulefiles/models/init_rule.py +18 -0
  234. autocoder/common/rulefiles/models/rule_file.py +18 -0
  235. autocoder/common/rulefiles/models/rule_relevance.py +14 -0
  236. autocoder/common/rulefiles/models/summary.py +16 -0
  237. autocoder/common/rulefiles/test_rulefiles.py +776 -0
  238. autocoder/common/rulefiles/utils/__init__.py +34 -0
  239. autocoder/common/rulefiles/utils/monitor.py +86 -0
  240. autocoder/common/rulefiles/utils/parser.py +230 -0
  241. autocoder/common/save_formatted_log.py +67 -10
  242. autocoder/common/search_replace.py +8 -1
  243. autocoder/common/search_replace_patch/__init__.py +24 -0
  244. autocoder/common/search_replace_patch/base.py +115 -0
  245. autocoder/common/search_replace_patch/manager.py +248 -0
  246. autocoder/common/search_replace_patch/patch_replacer.py +304 -0
  247. autocoder/common/search_replace_patch/similarity_replacer.py +306 -0
  248. autocoder/common/search_replace_patch/string_replacer.py +181 -0
  249. autocoder/common/search_replace_patch/tests/__init__.py +3 -0
  250. autocoder/common/search_replace_patch/tests/run_tests.py +126 -0
  251. autocoder/common/search_replace_patch/tests/test_base.py +188 -0
  252. autocoder/common/search_replace_patch/tests/test_empty_line_insert.py +233 -0
  253. autocoder/common/search_replace_patch/tests/test_integration.py +389 -0
  254. autocoder/common/search_replace_patch/tests/test_manager.py +351 -0
  255. autocoder/common/search_replace_patch/tests/test_patch_replacer.py +316 -0
  256. autocoder/common/search_replace_patch/tests/test_regex_replacer.py +306 -0
  257. autocoder/common/search_replace_patch/tests/test_similarity_replacer.py +384 -0
  258. autocoder/common/shell_commands/__init__.py +197 -0
  259. autocoder/common/shell_commands/background_process_notifier.py +346 -0
  260. autocoder/common/shell_commands/command_executor.py +1127 -0
  261. autocoder/common/shell_commands/error_recovery.py +541 -0
  262. autocoder/common/shell_commands/exceptions.py +120 -0
  263. autocoder/common/shell_commands/interactive_executor.py +476 -0
  264. autocoder/common/shell_commands/interactive_pexpect_process.py +623 -0
  265. autocoder/common/shell_commands/interactive_process.py +744 -0
  266. autocoder/common/shell_commands/interactive_session_manager.py +1014 -0
  267. autocoder/common/shell_commands/monitoring.py +529 -0
  268. autocoder/common/shell_commands/process_cleanup.py +386 -0
  269. autocoder/common/shell_commands/process_manager.py +606 -0
  270. autocoder/common/shell_commands/test_interactive_pexpect_process.py +281 -0
  271. autocoder/common/shell_commands/tests/__init__.py +6 -0
  272. autocoder/common/shell_commands/tests/conftest.py +118 -0
  273. autocoder/common/shell_commands/tests/test_background_process_notifier.py +703 -0
  274. autocoder/common/shell_commands/tests/test_command_executor.py +448 -0
  275. autocoder/common/shell_commands/tests/test_error_recovery.py +305 -0
  276. autocoder/common/shell_commands/tests/test_exceptions.py +299 -0
  277. autocoder/common/shell_commands/tests/test_execute_batch.py +588 -0
  278. autocoder/common/shell_commands/tests/test_indented_batch_commands.py +244 -0
  279. autocoder/common/shell_commands/tests/test_integration.py +664 -0
  280. autocoder/common/shell_commands/tests/test_monitoring.py +546 -0
  281. autocoder/common/shell_commands/tests/test_performance.py +632 -0
  282. autocoder/common/shell_commands/tests/test_process_cleanup.py +397 -0
  283. autocoder/common/shell_commands/tests/test_process_manager.py +606 -0
  284. autocoder/common/shell_commands/tests/test_timeout_config.py +343 -0
  285. autocoder/common/shell_commands/tests/test_timeout_manager.py +520 -0
  286. autocoder/common/shell_commands/timeout_config.py +315 -0
  287. autocoder/common/shell_commands/timeout_manager.py +352 -0
  288. autocoder/common/terminal_paste/__init__.py +14 -0
  289. autocoder/common/terminal_paste/demo.py +145 -0
  290. autocoder/common/terminal_paste/demo_paste_functionality.py +95 -0
  291. autocoder/common/terminal_paste/paste_handler.py +200 -0
  292. autocoder/common/terminal_paste/paste_manager.py +118 -0
  293. autocoder/common/terminal_paste/tests/__init__.py +1 -0
  294. autocoder/common/terminal_paste/tests/test_paste_handler.py +182 -0
  295. autocoder/common/terminal_paste/tests/test_paste_manager.py +126 -0
  296. autocoder/common/terminal_paste/utils.py +163 -0
  297. autocoder/common/test_autocoder_args.py +232 -0
  298. autocoder/common/test_env_manager.py +173 -0
  299. autocoder/common/test_env_manager_integration.py +159 -0
  300. autocoder/common/text_similarity/__init__.py +9 -0
  301. autocoder/common/text_similarity/demo.py +216 -0
  302. autocoder/common/text_similarity/examples.py +266 -0
  303. autocoder/common/text_similarity/test_text_similarity.py +306 -0
  304. autocoder/common/text_similarity/text_similarity.py +194 -0
  305. autocoder/common/text_similarity/utils.py +125 -0
  306. autocoder/common/todos/__init__.py +61 -0
  307. autocoder/common/todos/cache/__init__.py +16 -0
  308. autocoder/common/todos/cache/base_cache.py +89 -0
  309. autocoder/common/todos/cache/cache_manager.py +228 -0
  310. autocoder/common/todos/cache/memory_cache.py +225 -0
  311. autocoder/common/todos/config.py +155 -0
  312. autocoder/common/todos/exceptions.py +35 -0
  313. autocoder/common/todos/get_todo_manager.py +161 -0
  314. autocoder/common/todos/manager.py +537 -0
  315. autocoder/common/todos/models.py +239 -0
  316. autocoder/common/todos/storage/__init__.py +14 -0
  317. autocoder/common/todos/storage/base_storage.py +76 -0
  318. autocoder/common/todos/storage/file_storage.py +278 -0
  319. autocoder/common/tokens/counter.py +24 -2
  320. autocoder/common/tools_manager/__init__.py +17 -0
  321. autocoder/common/tools_manager/examples.py +162 -0
  322. autocoder/common/tools_manager/manager.py +385 -0
  323. autocoder/common/tools_manager/models.py +39 -0
  324. autocoder/common/tools_manager/test_tools_manager.py +303 -0
  325. autocoder/common/tools_manager/utils.py +191 -0
  326. autocoder/common/v2/agent/agentic_callbacks.py +270 -0
  327. autocoder/common/v2/agent/agentic_edit.py +2699 -1856
  328. autocoder/common/v2/agent/agentic_edit_change_manager.py +474 -0
  329. autocoder/common/v2/agent/agentic_edit_tools/__init__.py +35 -1
  330. autocoder/common/v2/agent/agentic_edit_tools/ac_mod_list_tool_resolver.py +279 -0
  331. autocoder/common/v2/agent/agentic_edit_tools/ac_mod_write_tool_resolver.py +10 -1
  332. autocoder/common/v2/agent/agentic_edit_tools/background_task_tool_resolver.py +1167 -0
  333. autocoder/common/v2/agent/agentic_edit_tools/base_tool_resolver.py +2 -2
  334. autocoder/common/v2/agent/agentic_edit_tools/conversation_message_ids_read_tool_resolver.py +214 -0
  335. autocoder/common/v2/agent/agentic_edit_tools/conversation_message_ids_write_tool_resolver.py +299 -0
  336. autocoder/common/v2/agent/agentic_edit_tools/count_tokens_tool_resolver.py +290 -0
  337. autocoder/common/v2/agent/agentic_edit_tools/execute_command_tool_resolver.py +564 -29
  338. autocoder/common/v2/agent/agentic_edit_tools/execute_workflow_tool_resolver.py +485 -0
  339. autocoder/common/v2/agent/agentic_edit_tools/extract_to_text_tool_resolver.py +225 -0
  340. autocoder/common/v2/agent/agentic_edit_tools/lint_report.py +79 -0
  341. autocoder/common/v2/agent/agentic_edit_tools/linter_config_models.py +343 -0
  342. autocoder/common/v2/agent/agentic_edit_tools/linter_enabled_tool_resolver.py +189 -0
  343. autocoder/common/v2/agent/agentic_edit_tools/list_files_tool_resolver.py +169 -101
  344. autocoder/common/v2/agent/agentic_edit_tools/load_extra_document_tool_resolver.py +349 -0
  345. autocoder/common/v2/agent/agentic_edit_tools/read_file_tool_resolver.py +243 -50
  346. autocoder/common/v2/agent/agentic_edit_tools/replace_in_file_tool_resolver.py +667 -147
  347. autocoder/common/v2/agent/agentic_edit_tools/run_named_subagents_tool_resolver.py +691 -0
  348. autocoder/common/v2/agent/agentic_edit_tools/search_files_tool_resolver.py +410 -86
  349. autocoder/common/v2/agent/agentic_edit_tools/session_interactive_tool_resolver.py +115 -0
  350. autocoder/common/v2/agent/agentic_edit_tools/session_start_tool_resolver.py +190 -0
  351. autocoder/common/v2/agent/agentic_edit_tools/session_stop_tool_resolver.py +76 -0
  352. autocoder/common/v2/agent/agentic_edit_tools/test_write_to_file_tool_resolver.py +207 -192
  353. autocoder/common/v2/agent/agentic_edit_tools/todo_read_tool_resolver.py +80 -63
  354. autocoder/common/v2/agent/agentic_edit_tools/todo_write_tool_resolver.py +237 -233
  355. autocoder/common/v2/agent/agentic_edit_tools/use_mcp_tool_resolver.py +2 -2
  356. autocoder/common/v2/agent/agentic_edit_tools/web_crawl_tool_resolver.py +557 -0
  357. autocoder/common/v2/agent/agentic_edit_tools/web_search_tool_resolver.py +600 -0
  358. autocoder/common/v2/agent/agentic_edit_tools/write_to_file_tool_resolver.py +56 -121
  359. autocoder/common/v2/agent/agentic_edit_types.py +343 -9
  360. autocoder/common/v2/agent/runner/__init__.py +3 -3
  361. autocoder/common/v2/agent/runner/base_runner.py +12 -26
  362. autocoder/common/v2/agent/runner/{event_runner.py → file_based_event_runner.py} +3 -2
  363. autocoder/common/v2/agent/runner/sdk_runner.py +150 -8
  364. autocoder/common/v2/agent/runner/terminal_runner.py +170 -57
  365. autocoder/common/v2/agent/runner/tool_display.py +557 -159
  366. autocoder/common/v2/agent/test_agentic_callbacks.py +265 -0
  367. autocoder/common/v2/agent/test_agentic_edit.py +194 -0
  368. autocoder/common/v2/agent/tool_caller/__init__.py +24 -0
  369. autocoder/common/v2/agent/tool_caller/default_tool_resolver_map.py +135 -0
  370. autocoder/common/v2/agent/tool_caller/integration_test.py +172 -0
  371. autocoder/common/v2/agent/tool_caller/plugins/__init__.py +14 -0
  372. autocoder/common/v2/agent/tool_caller/plugins/base_plugin.py +126 -0
  373. autocoder/common/v2/agent/tool_caller/plugins/examples/__init__.py +13 -0
  374. autocoder/common/v2/agent/tool_caller/plugins/examples/logging_plugin.py +164 -0
  375. autocoder/common/v2/agent/tool_caller/plugins/examples/security_filter_plugin.py +198 -0
  376. autocoder/common/v2/agent/tool_caller/plugins/plugin_interface.py +141 -0
  377. autocoder/common/v2/agent/tool_caller/test_tool_caller.py +278 -0
  378. autocoder/common/v2/agent/tool_caller/tool_call_plugin_manager.py +331 -0
  379. autocoder/common/v2/agent/tool_caller/tool_caller.py +337 -0
  380. autocoder/common/v2/agent/tool_caller/usage_example.py +193 -0
  381. autocoder/common/v2/code_agentic_editblock_manager.py +4 -4
  382. autocoder/common/v2/code_auto_generate.py +136 -78
  383. autocoder/common/v2/code_auto_generate_diff.py +135 -79
  384. autocoder/common/v2/code_auto_generate_editblock.py +174 -99
  385. autocoder/common/v2/code_auto_generate_strict_diff.py +151 -71
  386. autocoder/common/v2/code_auto_merge.py +1 -1
  387. autocoder/common/v2/code_auto_merge_editblock.py +13 -1
  388. autocoder/common/v2/code_diff_manager.py +3 -3
  389. autocoder/common/v2/code_editblock_manager.py +4 -14
  390. autocoder/common/v2/code_manager.py +1 -1
  391. autocoder/common/v2/code_strict_diff_manager.py +2 -2
  392. autocoder/common/wrap_llm_hint/__init__.py +10 -0
  393. autocoder/common/wrap_llm_hint/test_wrap_llm_hint.py +1067 -0
  394. autocoder/common/wrap_llm_hint/utils.py +432 -0
  395. autocoder/common/wrap_llm_hint/wrap_llm_hint.py +323 -0
  396. autocoder/completer/__init__.py +8 -0
  397. autocoder/completer/command_completer_v2.py +1051 -0
  398. autocoder/default_project/__init__.py +501 -0
  399. autocoder/dispacher/__init__.py +4 -12
  400. autocoder/dispacher/actions/action.py +165 -7
  401. autocoder/dispacher/actions/plugins/action_regex_project.py +2 -2
  402. autocoder/index/entry.py +116 -124
  403. autocoder/{agent → index/filter}/agentic_filter.py +322 -333
  404. autocoder/index/filter/normal_filter.py +5 -11
  405. autocoder/index/filter/quick_filter.py +1 -1
  406. autocoder/index/index.py +36 -9
  407. autocoder/index/tests/__init__.py +1 -0
  408. autocoder/index/tests/run_tests.py +195 -0
  409. autocoder/index/tests/test_entry.py +303 -0
  410. autocoder/index/tests/test_index_manager.py +314 -0
  411. autocoder/index/tests/test_module_integration.py +300 -0
  412. autocoder/index/tests/test_symbols_utils.py +183 -0
  413. autocoder/inner/__init__.py +4 -0
  414. autocoder/inner/agentic.py +932 -0
  415. autocoder/inner/async_command_handler.py +992 -0
  416. autocoder/inner/conversation_command_handlers.py +623 -0
  417. autocoder/inner/merge_command_handler.py +213 -0
  418. autocoder/inner/queue_command_handler.py +684 -0
  419. autocoder/models.py +95 -266
  420. autocoder/plugins/git_helper_plugin.py +31 -29
  421. autocoder/plugins/token_helper_plugin.py +65 -46
  422. autocoder/pyproject/__init__.py +32 -29
  423. autocoder/rag/agentic_rag.py +215 -75
  424. autocoder/rag/cache/simple_cache.py +1 -2
  425. autocoder/rag/loaders/image_loader.py +1 -1
  426. autocoder/rag/long_context_rag.py +42 -26
  427. autocoder/rag/qa_conversation_strategy.py +1 -1
  428. autocoder/rag/terminal/__init__.py +17 -0
  429. autocoder/rag/terminal/args.py +581 -0
  430. autocoder/rag/terminal/bootstrap.py +61 -0
  431. autocoder/rag/terminal/command_handlers.py +653 -0
  432. autocoder/rag/terminal/formatters/__init__.py +20 -0
  433. autocoder/rag/terminal/formatters/base.py +70 -0
  434. autocoder/rag/terminal/formatters/json_format.py +66 -0
  435. autocoder/rag/terminal/formatters/stream_json.py +95 -0
  436. autocoder/rag/terminal/formatters/text.py +28 -0
  437. autocoder/rag/terminal/init.py +120 -0
  438. autocoder/rag/terminal/utils.py +106 -0
  439. autocoder/rag/test_agentic_rag.py +389 -0
  440. autocoder/rag/test_doc_filter.py +3 -3
  441. autocoder/rag/test_long_context_rag.py +1 -1
  442. autocoder/rag/test_token_limiter.py +517 -10
  443. autocoder/rag/token_counter.py +3 -0
  444. autocoder/rag/token_limiter.py +19 -15
  445. autocoder/rag/tools/__init__.py +26 -2
  446. autocoder/rag/tools/bochaai_example.py +343 -0
  447. autocoder/rag/tools/bochaai_sdk.py +541 -0
  448. autocoder/rag/tools/metaso_example.py +268 -0
  449. autocoder/rag/tools/metaso_sdk.py +417 -0
  450. autocoder/rag/tools/recall_tool.py +28 -7
  451. autocoder/rag/tools/run_integration_tests.py +204 -0
  452. autocoder/rag/tools/test_all_providers.py +318 -0
  453. autocoder/rag/tools/test_bochaai_integration.py +482 -0
  454. autocoder/rag/tools/test_final_integration.py +215 -0
  455. autocoder/rag/tools/test_metaso_integration.py +424 -0
  456. autocoder/rag/tools/test_metaso_real.py +171 -0
  457. autocoder/rag/tools/test_web_crawl_tool.py +639 -0
  458. autocoder/rag/tools/test_web_search_tool.py +509 -0
  459. autocoder/rag/tools/todo_read_tool.py +202 -0
  460. autocoder/rag/tools/todo_write_tool.py +412 -0
  461. autocoder/rag/tools/web_crawl_tool.py +634 -0
  462. autocoder/rag/tools/web_search_tool.py +558 -0
  463. autocoder/rag/tools/web_tools_example.py +119 -0
  464. autocoder/rag/types.py +16 -0
  465. autocoder/rag/variable_holder.py +4 -2
  466. autocoder/rags.py +86 -79
  467. autocoder/regexproject/__init__.py +23 -21
  468. autocoder/sdk/__init__.py +46 -190
  469. autocoder/sdk/api.py +370 -0
  470. autocoder/sdk/async_runner/__init__.py +26 -0
  471. autocoder/sdk/async_runner/async_executor.py +650 -0
  472. autocoder/sdk/async_runner/async_handler.py +356 -0
  473. autocoder/sdk/async_runner/markdown_processor.py +595 -0
  474. autocoder/sdk/async_runner/task_metadata.py +284 -0
  475. autocoder/sdk/async_runner/worktree_manager.py +438 -0
  476. autocoder/sdk/cli/__init__.py +2 -5
  477. autocoder/sdk/cli/formatters.py +28 -204
  478. autocoder/sdk/cli/handlers.py +77 -44
  479. autocoder/sdk/cli/main.py +154 -171
  480. autocoder/sdk/cli/options.py +95 -22
  481. autocoder/sdk/constants.py +139 -51
  482. autocoder/sdk/core/auto_coder_core.py +484 -109
  483. autocoder/sdk/core/bridge.py +297 -115
  484. autocoder/sdk/exceptions.py +18 -12
  485. autocoder/sdk/formatters/__init__.py +19 -0
  486. autocoder/sdk/formatters/input.py +64 -0
  487. autocoder/sdk/formatters/output.py +247 -0
  488. autocoder/sdk/formatters/stream.py +54 -0
  489. autocoder/sdk/models/__init__.py +6 -5
  490. autocoder/sdk/models/options.py +55 -18
  491. autocoder/sdk/utils/formatters.py +27 -195
  492. autocoder/suffixproject/__init__.py +28 -25
  493. autocoder/terminal/__init__.py +14 -0
  494. autocoder/terminal/app.py +454 -0
  495. autocoder/terminal/args.py +32 -0
  496. autocoder/terminal/bootstrap.py +178 -0
  497. autocoder/terminal/command_processor.py +521 -0
  498. autocoder/terminal/command_registry.py +57 -0
  499. autocoder/terminal/help.py +97 -0
  500. autocoder/terminal/tasks/__init__.py +5 -0
  501. autocoder/terminal/tasks/background.py +77 -0
  502. autocoder/terminal/tasks/task_event.py +70 -0
  503. autocoder/terminal/ui/__init__.py +13 -0
  504. autocoder/terminal/ui/completer.py +268 -0
  505. autocoder/terminal/ui/keybindings.py +75 -0
  506. autocoder/terminal/ui/session.py +41 -0
  507. autocoder/terminal/ui/toolbar.py +64 -0
  508. autocoder/terminal/utils/__init__.py +13 -0
  509. autocoder/terminal/utils/errors.py +18 -0
  510. autocoder/terminal/utils/paths.py +19 -0
  511. autocoder/terminal/utils/shell.py +43 -0
  512. autocoder/terminal_v3/__init__.py +10 -0
  513. autocoder/terminal_v3/app.py +201 -0
  514. autocoder/terminal_v3/handlers/__init__.py +5 -0
  515. autocoder/terminal_v3/handlers/command_handler.py +131 -0
  516. autocoder/terminal_v3/models/__init__.py +6 -0
  517. autocoder/terminal_v3/models/conversation_buffer.py +214 -0
  518. autocoder/terminal_v3/models/message.py +50 -0
  519. autocoder/terminal_v3/models/tool_display.py +247 -0
  520. autocoder/terminal_v3/ui/__init__.py +7 -0
  521. autocoder/terminal_v3/ui/keybindings.py +56 -0
  522. autocoder/terminal_v3/ui/layout.py +141 -0
  523. autocoder/terminal_v3/ui/styles.py +43 -0
  524. autocoder/tsproject/__init__.py +23 -23
  525. autocoder/utils/auto_coder_utils/chat_stream_out.py +1 -1
  526. autocoder/utils/llms.py +88 -80
  527. autocoder/utils/math_utils.py +101 -0
  528. autocoder/utils/model_provider_selector.py +16 -4
  529. autocoder/utils/operate_config_api.py +33 -5
  530. autocoder/utils/thread_utils.py +2 -2
  531. autocoder/version.py +4 -2
  532. autocoder/workflow_agents/__init__.py +84 -0
  533. autocoder/workflow_agents/agent.py +143 -0
  534. autocoder/workflow_agents/exceptions.py +573 -0
  535. autocoder/workflow_agents/executor.py +489 -0
  536. autocoder/workflow_agents/loader.py +737 -0
  537. autocoder/workflow_agents/runner.py +267 -0
  538. autocoder/workflow_agents/types.py +172 -0
  539. autocoder/workflow_agents/utils.py +434 -0
  540. autocoder/workflow_agents/workflow_manager.py +211 -0
  541. auto_coder-1.0.0.dist-info/METADATA +0 -396
  542. auto_coder-1.0.0.dist-info/RECORD +0 -442
  543. auto_coder-1.0.0.dist-info/licenses/LICENSE +0 -201
  544. autocoder/auto_coder_server.py +0 -672
  545. autocoder/benchmark.py +0 -138
  546. autocoder/common/ac_style_command_parser/example.py +0 -7
  547. autocoder/common/cleaner.py +0 -31
  548. autocoder/common/command_completer_v2.py +0 -615
  549. autocoder/common/context_pruner.py +0 -477
  550. autocoder/common/conversation_pruner.py +0 -132
  551. autocoder/common/directory_cache/__init__.py +0 -1
  552. autocoder/common/directory_cache/cache.py +0 -192
  553. autocoder/common/directory_cache/test_cache.py +0 -190
  554. autocoder/common/file_checkpoint/examples.py +0 -217
  555. autocoder/common/llm_friendly_package_example.py +0 -138
  556. autocoder/common/llm_friendly_package_test.py +0 -63
  557. autocoder/common/pull_requests/test_module.py +0 -1
  558. autocoder/common/rulefiles/autocoderrules_utils.py +0 -484
  559. autocoder/common/text.py +0 -30
  560. autocoder/common/v2/agent/agentic_edit_tools/list_package_info_tool_resolver.py +0 -42
  561. autocoder/common/v2/agent/agentic_edit_tools/test_execute_command_tool_resolver.py +0 -70
  562. autocoder/common/v2/agent/agentic_edit_tools/test_search_files_tool_resolver.py +0 -163
  563. autocoder/common/v2/agent/agentic_tool_display.py +0 -183
  564. autocoder/plugins/dynamic_completion_example.py +0 -148
  565. autocoder/plugins/sample_plugin.py +0 -160
  566. autocoder/sdk/cli/__main__.py +0 -26
  567. autocoder/sdk/cli/completion_wrapper.py +0 -38
  568. autocoder/sdk/cli/install_completion.py +0 -301
  569. autocoder/sdk/models/messages.py +0 -209
  570. autocoder/sdk/session/__init__.py +0 -32
  571. autocoder/sdk/session/session.py +0 -106
  572. autocoder/sdk/session/session_manager.py +0 -56
  573. {auto_coder-1.0.0.dist-info → auto_coder-2.0.0.dist-info}/top_level.txt +0 -0
  574. /autocoder/{sdk/example.py → common/agent_query_queue/__init__.py} +0 -0
@@ -1,147 +1,370 @@
1
- from typing import List, Dict, Any, Union
1
+ from typing import List, Dict, Any, Union, Optional
2
2
  import json
3
3
  import re
4
- from pydantic import BaseModel
4
+ import copy
5
5
  import byzerllm
6
6
  from autocoder.common.printer import Printer
7
- from autocoder.rag.token_counter import count_tokens
7
+ from autocoder.common.tokens import count_string_tokens
8
8
  from loguru import logger
9
9
  from autocoder.common import AutoCoderArgs
10
+ from autocoder.common.autocoderargs_parser import AutoCoderArgsParser
10
11
  from autocoder.common.save_formatted_log import save_formatted_log
12
+ from autocoder.common.wrap_llm_hint.utils import merge_with_last_user_message
13
+ from autocoder.common.conversations.get_conversation_manager import get_conversation_manager
14
+ from .tool_content_detector import ToolContentDetector
15
+ from .conversation_message_ids_api import get_conversation_message_ids_api
16
+ from .conversation_message_ids_pruner import ConversationMessageIdsPruner
11
17
 
12
- class AgenticPruneStrategy(BaseModel):
13
- name: str
14
- description: str
15
- config: Dict[str, Any] = {"safe_zone_tokens": 0}
16
18
 
17
19
  class AgenticConversationPruner:
18
20
  """
19
21
  Specialized conversation pruner for agentic conversations that cleans up tool outputs.
20
-
22
+
21
23
  This pruner specifically targets tool result messages (role='user', content contains '<tool_result>')
22
24
  and replaces their content with a placeholder message to reduce token usage while maintaining
23
25
  conversation flow.
24
26
  """
25
-
26
- def __init__(self, args: AutoCoderArgs, llm: Union[byzerllm.ByzerLLM, byzerllm.SimpleByzerLLM]):
27
+
28
+ def __init__(self, args: AutoCoderArgs, llm: Union[byzerllm.ByzerLLM, byzerllm.SimpleByzerLLM, None], conversation_id: Optional[str] = None):
29
+ if conversation_id is None:
30
+ raise ValueError("conversation_id is required in AgenticConversationPruner")
31
+
27
32
  self.args = args
28
33
  self.llm = llm
34
+ self.conversation_id = conversation_id
29
35
  self.printer = Printer()
30
36
  self.replacement_message = "This message has been cleared. If you still want to get this information, you can call the tool again to retrieve it."
31
-
32
- self.strategies = {
33
- "tool_output_cleanup": AgenticPruneStrategy(
34
- name="tool_output_cleanup",
35
- description="Clean up tool output results by replacing content with placeholder messages",
36
- config={"safe_zone_tokens": self.args.conversation_prune_safe_zone_tokens}
37
- )
37
+
38
+ # Initialize AutoCoderArgs parser for flexible parameter parsing
39
+ self.args_parser = AutoCoderArgsParser()
40
+
41
+ # Initialize tool content detector
42
+ self.tool_content_detector = ToolContentDetector(
43
+ replacement_message="Content cleared to save tokens"
44
+ )
45
+
46
+ # Initialize message IDs-based pruning components
47
+ self.message_ids_api = get_conversation_message_ids_api()
48
+ self.message_ids_pruner = ConversationMessageIdsPruner()
49
+
50
+ # Track pruning statistics
51
+ self.pruning_stats = {
52
+ "range_pruning_applied": False,
53
+ "range_pruning_success": False,
54
+ "original_length": 0,
55
+ "after_range_pruning": 0,
56
+ "after_tool_cleanup": 0,
57
+ "total_compression_ratio": 1.0
38
58
  }
39
59
 
40
- def get_available_strategies(self) -> List[Dict[str, Any]]:
41
- """Get all available pruning strategies"""
42
- return [strategy.model_dump() for strategy in self.strategies.values()]
60
+ def _get_current_conversation_id(self) -> str:
61
+ """
62
+ Get the current conversation ID from the constructor parameter.
43
63
 
44
- def prune_conversations(self, conversations: List[Dict[str, Any]],
45
- strategy_name: str = "tool_output_cleanup") -> List[Dict[str, Any]]:
64
+ Returns:
65
+ Current conversation ID (guaranteed to be not None)
46
66
  """
47
- Prune conversations by cleaning up tool outputs.
67
+ return self.conversation_id
68
+
69
+ def _get_parsed_safe_zone_tokens(self) -> int:
70
+ """
71
+ 解析 conversation_prune_safe_zone_tokens 参数,支持多种格式
72
+
73
+ Returns:
74
+ 解析后的 token 数量
75
+ """
76
+ # 添加调试信息
77
+ raw_value = self.args.conversation_prune_safe_zone_tokens
78
+ code_model = self.args.code_model or self.args.model
79
+
80
+ result = self.args_parser.parse_conversation_prune_safe_zone_tokens(
81
+ raw_value,
82
+ code_model
83
+ )
84
+
85
+ # 防护逻辑:如果结果为 0,使用默认值
86
+ if result == 0:
87
+ default_value = 50 * 1024
88
+ print(f"[WARNING] conversation_prune_safe_zone_tokens 为 0,使用默认值: {default_value}")
89
+ return default_value
48
90
 
91
+ return result
92
+
93
+ def prune_conversations(self, conversations: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
94
+ """
95
+ Prune conversations by applying range-based pruning first, then cleaning up tool outputs and tool call content.
96
+
49
97
  Args:
50
98
  conversations: Original conversation list
51
- strategy_name: Strategy name
52
-
99
+
53
100
  Returns:
54
101
  Pruned conversation list
55
- """
56
- safe_zone_tokens = self.args.conversation_prune_safe_zone_tokens
57
- current_tokens = count_tokens(json.dumps(conversations, ensure_ascii=False))
58
-
102
+ """
103
+ safe_zone_tokens = self._get_parsed_safe_zone_tokens()
104
+ # print(f"safe_zone_tokens: {safe_zone_tokens}")
105
+
106
+ # 保存原始conversations的深拷贝,用于最终对比分析
107
+ # original_conversations = copy.deepcopy(conversations)
108
+ original_length = len(conversations)
109
+
110
+ # Initialize pruning statistics
111
+ self.pruning_stats["original_length"] = original_length
112
+
113
+ current_tokens = count_string_tokens(
114
+ json.dumps(conversations, ensure_ascii=False))
115
+
59
116
  if current_tokens <= safe_zone_tokens:
117
+ # Update stats for no pruning needed
118
+ self.pruning_stats.update({
119
+ "after_range_pruning": original_length,
120
+ "after_tool_cleanup": original_length,
121
+ "total_compression_ratio": 1.0
122
+ })
60
123
  return conversations
61
124
 
62
- strategy = self.strategies.get(strategy_name, self.strategies["tool_output_cleanup"])
63
-
64
- if strategy.name == "tool_output_cleanup":
65
- return self._tool_output_cleanup_prune(conversations, strategy.config)
66
- else:
67
- logger.warning(f"Unknown strategy: {strategy_name}, using tool_output_cleanup instead")
68
- return self._tool_output_cleanup_prune(conversations, strategy.config)
125
+ # Step 1: Apply message ids pruning if conversation_id is provided
126
+ processed_conversations = self._apply_message_ids_pruning(
127
+ conversations)
128
+ logger.info(
129
+ f"After Message IDs pruning: {len(conversations)} -> {len(processed_conversations)} messages")
130
+
131
+ # Check if we're within safe zone after range pruning
132
+ current_tokens = count_string_tokens(json.dumps(
133
+ processed_conversations, ensure_ascii=False))
134
+
135
+ # Step 2: Apply tool cleanup if still needed
136
+ if current_tokens > safe_zone_tokens:
137
+ config = {"safe_zone_tokens": safe_zone_tokens}
138
+ processed_conversations = self._unified_tool_cleanup_prune(
139
+ processed_conversations, config)
140
+
141
+ # Update final statistics
142
+ final_length = len(processed_conversations)
143
+ self.pruning_stats["after_tool_cleanup"] = final_length
144
+ self.pruning_stats["total_compression_ratio"] = final_length / \
145
+ original_length if original_length > 0 else 1.0
146
+
147
+ # Log overall pruning results
148
+ logger.info(f"Complete pruning: {original_length} -> {final_length} messages "
149
+ f"(total compression: {self.pruning_stats['total_compression_ratio']:.2%})")
150
+
151
+ # if the processed_conversations is still too long, we should add a user message to ask the LLM to clean up the conversation
152
+ final_tokens = count_string_tokens(json.dumps(
153
+ processed_conversations, ensure_ascii=False))
154
+ if final_tokens > safe_zone_tokens:
155
+ cleanup_message = "The conversation is still too long, please use conversation_message_ids_write tool to save the message ids to be deleted."
69
156
 
70
- def _tool_output_cleanup_prune(self, conversations: List[Dict[str, Any]],
71
- config: Dict[str, Any]) -> List[Dict[str, Any]]:
157
+ # Use standardized hint merging from wrap_llm_hint module
158
+ processed_conversations = merge_with_last_user_message(
159
+ processed_conversations, cleanup_message)
160
+ # 执行裁剪前后对比分析并记录到日志
161
+ # self._compare_and_log_conversations(original_conversations, processed_conversations)
162
+
163
+ save_formatted_log(self.args.source_dir, json.dumps(processed_conversations, ensure_ascii=False),
164
+ "agentic_pruned_conversation", conversation_id=self._get_current_conversation_id())
165
+
166
+ return processed_conversations
167
+
168
+ def _apply_message_ids_pruning(self, conversations: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
72
169
  """
73
- Clean up tool outputs by replacing their content with placeholder messages.
74
-
170
+ Apply message IDs-based pruning if conversation_id is provided and message IDs configuration exists.
171
+
172
+ Args:
173
+ conversations: Original conversation list
174
+
175
+ Returns:
176
+ Conversations after message IDs pruning (or original if no message IDs config)
177
+ """
178
+ # Check if we have conversation_id and message IDs configuration
179
+ conversation_id = self._get_current_conversation_id()
180
+ if not conversation_id:
181
+ logger.debug(
182
+ "No conversation_id provided, skipping message IDs pruning")
183
+ self.pruning_stats["after_range_pruning"] = len(conversations)
184
+ return conversations
185
+
186
+ # Get message IDs configuration for this conversation
187
+ conversation_message_ids = self.message_ids_api.get_conversation_message_ids(
188
+ conversation_id)
189
+ if not conversation_message_ids:
190
+ logger.debug(
191
+ f"No message IDs configuration found for conversation {conversation_id}, skipping message IDs pruning")
192
+ self.pruning_stats["after_range_pruning"] = len(conversations)
193
+ return conversations
194
+
195
+ # Apply message IDs pruning
196
+ logger.info(
197
+ f"Applying message IDs pruning for conversation {conversation_id}")
198
+ self.pruning_stats["range_pruning_applied"] = True
199
+
200
+ try:
201
+ pruning_result = self.message_ids_pruner.prune_conversations(
202
+ conversations, conversation_message_ids)
203
+ logger.info(f"Message IDs: {pruning_result}")
204
+
205
+ if pruning_result.success:
206
+ self.pruning_stats["range_pruning_success"] = True
207
+ self.pruning_stats["after_range_pruning"] = pruning_result.pruned_length
208
+
209
+ # Log message IDs pruning results
210
+ logger.info(f"Message IDs pruning completed: {pruning_result.original_length} -> {pruning_result.pruned_length} messages "
211
+ f"(message IDs compression: {pruning_result.compression_ratio:.2%})")
212
+
213
+ # Log warnings if any
214
+ if pruning_result.warnings:
215
+ for warning in pruning_result.warnings:
216
+ logger.warning(f"Message IDs pruning: {warning}")
217
+
218
+ return pruning_result.pruned_conversations
219
+ else:
220
+ # Message IDs pruning failed, log error and continue with original conversations
221
+ logger.error(
222
+ f"Message IDs pruning failed: {pruning_result.error_message}")
223
+ self.pruning_stats["after_range_pruning"] = len(conversations)
224
+ return conversations
225
+
226
+ except Exception as e:
227
+ logger.error(
228
+ f"Message IDs pruning failed with exception: {str(e)}")
229
+ self.pruning_stats["after_range_pruning"] = len(conversations)
230
+ return conversations
231
+
232
+ def _unified_tool_cleanup_prune(self, conversations: List[Dict[str, Any]],
233
+ config: Dict[str, Any]) -> List[Dict[str, Any]]:
234
+ """
235
+ Clean up both tool output results and tool call content in a unified process.
236
+
75
237
  This method:
76
- 1. Identifies tool result messages (role='user' with '<tool_result' in content)
77
- 2. Starts from the first tool output and progressively cleans them
78
- 3. Stops when token count is within safe zone
238
+ 1. Identifies both tool result messages (role='user' with '<tool_result' in content)
239
+ and assistant messages containing tool calls with large content
240
+ 2. Processes all cleanable messages in order, prioritizing tool results first
241
+ 3. Stops when token count is within safe zone OR when less than 6 unpruned messages remain
79
242
  """
80
- safe_zone_tokens = config.get("safe_zone_tokens", 50 * 1024)
81
- processed_conversations = conversations.copy()
82
-
83
- # Find all tool result message indices
84
- tool_result_indices = []
243
+ safe_zone_tokens = config.get("safe_zone_tokens", 80 * 1024)
244
+ # 使用深拷贝避免修改原始数据
245
+ processed_conversations = copy.deepcopy(conversations)
246
+
247
+ # 预先计算初始 token 数量,避免在循环中引用未定义的变量
248
+ current_tokens = count_string_tokens(json.dumps(
249
+ processed_conversations, ensure_ascii=False))
250
+
251
+ # Find all cleanable message indices with their types
252
+ cleanable_messages = []
253
+
254
+ # Find both tool result messages and tool call messages in one loop
85
255
  for i, conv in enumerate(processed_conversations):
86
- if (conv.get("role") == "user" and
87
- isinstance(conv.get("content"), str) and
88
- self._is_tool_result_message(conv.get("content", ""))):
89
- tool_result_indices.append(i)
90
-
91
- logger.info(f"Found {len(tool_result_indices)} tool result messages to potentially clean")
92
-
93
- # Clean tool outputs one by one, starting from the first one
94
- for tool_index in tool_result_indices:
95
- current_tokens = count_tokens(json.dumps(processed_conversations, ensure_ascii=False))
96
-
256
+ content = conv.get("content", "")
257
+ role = conv.get("role")
258
+
259
+ if isinstance(content, str):
260
+ # Check for tool result messages (user role)
261
+ if (role == "user" and self._is_tool_result_message(content)):
262
+ cleanable_messages.append(
263
+ {"index": i, "type": "tool_result"})
264
+ # Check for assistant messages with tool calls
265
+ elif (role == "assistant" and self.tool_content_detector.is_tool_call_content(content)):
266
+ cleanable_messages.append(
267
+ {"index": i, "type": "tool_call"})
268
+
269
+ # Sort by index to process in order, but prioritize tool_result messages
270
+ cleanable_messages.sort(key=lambda x: (
271
+ x["index"], x["type"] != "tool_result"))
272
+
273
+ logger.info(f"Found {len([m for m in cleanable_messages if m['type'] == 'tool_result'])} tool result messages "
274
+ f"and {len([m for m in cleanable_messages if m['type'] == 'tool_call'])} tool call messages to potentially clean")
275
+
276
+ # Track cleaned messages
277
+ cleaned_count = 0
278
+
279
+ # Clean messages one by one
280
+ for i, message_info in enumerate(cleanable_messages):
281
+ # 更新当前 token 数量
282
+ current_tokens = count_string_tokens(json.dumps(
283
+ processed_conversations, ensure_ascii=False))
284
+
285
+ # 检查停止条件
286
+ # 1. Token数已经在安全区域内
97
287
  if current_tokens <= safe_zone_tokens:
98
- logger.info(f"Token count ({current_tokens}) is within safe zone ({safe_zone_tokens}), stopping cleanup")
288
+ logger.info(
289
+ f"Token count ({current_tokens}) is within safe zone ({safe_zone_tokens}), stopping cleanup")
99
290
  break
100
-
101
- # Extract tool name for a more specific replacement message
102
- tool_name = self._extract_tool_name(processed_conversations[tool_index]["content"])
103
- replacement_content = self._generate_replacement_message(tool_name)
104
-
105
- # Replace the content
106
- original_content = processed_conversations[tool_index]["content"]
107
- processed_conversations[tool_index]["content"] = replacement_content
108
-
109
- logger.info(f"Cleaned tool result at index {tool_index} (tool: {tool_name}), "
110
- f"reduced from {len(original_content)} to {len(replacement_content)} characters")
111
-
112
- final_tokens = count_tokens(json.dumps(processed_conversations, ensure_ascii=False))
113
- logger.info(f"Cleanup completed. Token count: {current_tokens} -> {final_tokens}")
114
291
 
115
- save_formatted_log(self.args.source_dir, json.dumps(conversations, ensure_ascii=False), "agentic_pruned_conversation")
116
-
292
+ # 2. 剩余未裁剪的对话少于6段
293
+ remaining_unpruned = len(
294
+ cleanable_messages) - (i + 1) # i+1 因为i是从0开始的索引
295
+ if remaining_unpruned < 6:
296
+ logger.info(
297
+ f"Less than 6 unpruned messages remaining ({remaining_unpruned}), stopping cleanup")
298
+ break
299
+
300
+ msg_index = message_info["index"]
301
+ msg_type = message_info["type"]
302
+ original_content = processed_conversations[msg_index]["content"]
303
+
304
+ if msg_type == "tool_result":
305
+ # Handle tool result cleanup
306
+ tool_name = self._extract_tool_name(original_content)
307
+ replacement_content = self._generate_replacement_message(
308
+ tool_name)
309
+ processed_conversations[msg_index]["content"] = replacement_content
310
+ cleaned_count += 1
311
+
312
+ logger.info(f"Cleaned tool result at index {msg_index} (tool: {tool_name}), "
313
+ f"reduced from {len(original_content)} to {len(replacement_content)} characters")
314
+
315
+ elif msg_type == "tool_call":
316
+ # Handle tool call content cleanup
317
+ tool_info = self.tool_content_detector.detect_tool_call(
318
+ original_content)
319
+
320
+ if tool_info:
321
+ new_content, replaced = self.tool_content_detector.replace_tool_content(
322
+ original_content, max_content_length=500
323
+ )
324
+
325
+ if replaced:
326
+ processed_conversations[msg_index]["content"] = new_content
327
+ cleaned_count += 1
328
+ logger.info(f"Cleaned tool call content at index {msg_index} (tool: {tool_info['tool_name']}), "
329
+ f"reduced from {len(original_content)} to {len(new_content)} characters")
330
+
331
+ final_tokens = count_string_tokens(json.dumps(
332
+ processed_conversations, ensure_ascii=False))
333
+ initial_tokens = count_string_tokens(
334
+ json.dumps(conversations, ensure_ascii=False))
335
+ logger.info(
336
+ f"Unified tool cleanup completed. Cleaned {cleaned_count} messages. Token count: {initial_tokens} -> {final_tokens}")
337
+
117
338
  return processed_conversations
118
339
 
119
340
  def _is_tool_result_message(self, content: str) -> bool:
120
341
  """
121
342
  Check if a message content contains tool result XML.
122
-
343
+
123
344
  Args:
124
345
  content: Message content to check
125
-
346
+
126
347
  Returns:
127
348
  True if content contains tool result format
128
349
  """
350
+ if content is None:
351
+ return False
129
352
  return "<tool_result" in content and "tool_name=" in content
130
353
 
131
354
  def _extract_tool_name(self, content: str) -> str:
132
355
  """
133
356
  Extract tool name from tool result XML content.
134
-
357
+
135
358
  Args:
136
359
  content: Tool result XML content
137
-
360
+
138
361
  Returns:
139
362
  Tool name or 'unknown' if not found
140
363
  """
141
364
  # Pattern to match: <tool_result tool_name='...' or <tool_result tool_name="..."
142
- pattern = r"<tool_result[^>]*tool_name=['\"]([^'\"]+)['\"]"
365
+ pattern = r"<tool_result[^>]*tool_name=['\"]([^'\"]*)['\"]"
143
366
  match = re.search(pattern, content)
144
-
367
+
145
368
  if match:
146
369
  return match.group(1)
147
370
  return "unknown"
@@ -149,49 +372,375 @@ class AgenticConversationPruner:
149
372
  def _generate_replacement_message(self, tool_name: str) -> str:
150
373
  """
151
374
  Generate a replacement message for a cleaned tool result.
152
-
375
+
153
376
  Args:
154
377
  tool_name: Name of the tool that was called
155
-
378
+
156
379
  Returns:
157
380
  Replacement message string
158
381
  """
159
382
  if tool_name and tool_name != "unknown":
160
383
  return (f"<tool_result tool_name='{tool_name}' success='true'>"
161
- f"<message>Content cleared to save tokens</message>"
162
- f"<content>{self.replacement_message}</content>"
163
- f"</tool_result>")
384
+ f"<message>Content cleared to save tokens</message>"
385
+ f"<content>{self.replacement_message}</content>"
386
+ f"</tool_result>")
164
387
  else:
165
388
  return f"<tool_result success='true'><message>[Content cleared to save tokens, you can call the tool again to get the tool result.]</message><content>{self.replacement_message}</content></tool_result>"
166
389
 
167
- def get_cleanup_statistics(self, original_conversations: List[Dict[str, Any]],
168
- pruned_conversations: List[Dict[str, Any]]) -> Dict[str, Any]:
390
+ def get_cleanup_statistics(self, original_conversations: List[Dict[str, Any]],
391
+ pruned_conversations: List[Dict[str, Any]]) -> Dict[str, Any]:
169
392
  """
170
393
  Get statistics about the cleanup process.
171
-
394
+
172
395
  Args:
173
396
  original_conversations: Original conversation list
174
397
  pruned_conversations: Pruned conversation list
175
-
398
+
176
399
  Returns:
177
400
  Dictionary with cleanup statistics
178
401
  """
179
- original_tokens = count_tokens(json.dumps(original_conversations, ensure_ascii=False))
180
- pruned_tokens = count_tokens(json.dumps(pruned_conversations, ensure_ascii=False))
181
-
402
+ original_tokens = count_string_tokens(
403
+ json.dumps(original_conversations, ensure_ascii=False))
404
+ pruned_tokens = count_string_tokens(
405
+ json.dumps(pruned_conversations, ensure_ascii=False))
406
+
182
407
  # Count cleaned tool results
183
- cleaned_count = 0
408
+ tool_results_cleaned = 0
409
+ tool_calls_cleaned = 0
410
+
184
411
  for orig, pruned in zip(original_conversations, pruned_conversations):
185
- if (orig.get("role") == "user" and
186
- self._is_tool_result_message(orig.get("content", "")) and
187
- orig.get("content") != pruned.get("content")):
188
- cleaned_count += 1
189
-
412
+ if orig.get("content") != pruned.get("content"):
413
+ # Check if it's a tool result message (user role)
414
+ if (orig.get("role") == "user" and
415
+ self._is_tool_result_message(orig.get("content", ""))):
416
+ tool_results_cleaned += 1
417
+
418
+ # Check if it's a tool call message (assistant role)
419
+ elif (orig.get("role") == "assistant" and
420
+ self.tool_content_detector.is_tool_call_content(orig.get("content", ""))):
421
+ tool_calls_cleaned += 1
422
+
190
423
  return {
191
424
  "original_tokens": original_tokens,
192
425
  "pruned_tokens": pruned_tokens,
193
426
  "tokens_saved": original_tokens - pruned_tokens,
194
427
  "compression_ratio": pruned_tokens / original_tokens if original_tokens > 0 else 1.0,
195
- "tool_results_cleaned": cleaned_count,
428
+ "tool_results_cleaned": tool_results_cleaned,
429
+ "tool_calls_cleaned": tool_calls_cleaned,
196
430
  "total_messages": len(original_conversations)
197
- }
431
+ }
432
+
433
+ def get_pruning_statistics(self) -> Dict[str, Any]:
434
+ """
435
+ Get comprehensive pruning statistics including both range and tool cleanup.
436
+
437
+ Returns:
438
+ Dictionary with complete pruning statistics
439
+ """
440
+ return {
441
+ "range_pruning": {
442
+ "applied": self.pruning_stats["range_pruning_applied"],
443
+ "success": self.pruning_stats["range_pruning_success"],
444
+ "conversation_id": self._get_current_conversation_id()
445
+ },
446
+ "message_counts": {
447
+ "original": self.pruning_stats["original_length"],
448
+ "after_range_pruning": self.pruning_stats["after_range_pruning"],
449
+ "after_tool_cleanup": self.pruning_stats["after_tool_cleanup"]
450
+ },
451
+ "compression": {
452
+ "range_pruning_ratio": (
453
+ self.pruning_stats["after_range_pruning"] /
454
+ self.pruning_stats["original_length"]
455
+ if self.pruning_stats["original_length"] > 0 else 1.0
456
+ ),
457
+ "tool_cleanup_ratio": (
458
+ self.pruning_stats["after_tool_cleanup"] /
459
+ self.pruning_stats["after_range_pruning"]
460
+ if self.pruning_stats["after_range_pruning"] > 0 else 1.0
461
+ ),
462
+ "total_compression_ratio": self.pruning_stats["total_compression_ratio"]
463
+ },
464
+ "messages_removed": {
465
+ "by_range_pruning": (
466
+ self.pruning_stats["original_length"] -
467
+ self.pruning_stats["after_range_pruning"]
468
+ ),
469
+ "by_tool_cleanup": (
470
+ self.pruning_stats["after_range_pruning"] -
471
+ self.pruning_stats["after_tool_cleanup"]
472
+ ),
473
+ "total_removed": (
474
+ self.pruning_stats["original_length"] -
475
+ self.pruning_stats["after_tool_cleanup"]
476
+ )
477
+ }
478
+ }
479
+
480
+ def _compare_and_log_conversations(self, original_conversations: List[Dict[str, Any]],
481
+ pruned_conversations: List[Dict[str, Any]]) -> None:
482
+ """
483
+ 独立的方法:对比裁剪前后的conversations,生成详细的对比报告并记录到日志中。
484
+
485
+ Args:
486
+ original_conversations: 裁剪前的对话列表
487
+ pruned_conversations: 裁剪后的对话列表
488
+ """
489
+ try:
490
+ # 生成对比分析报告
491
+ comparison_report = self._generate_comparison_report(
492
+ original_conversations, pruned_conversations)
493
+
494
+ # 记录详细的对比日志
495
+ logger.info("=== 对话裁剪前后对比分析 ===")
496
+ logger.info(
497
+ f"原始对话数量: {comparison_report['message_counts']['original']}")
498
+ logger.info(
499
+ f"裁剪后对话数量: {comparison_report['message_counts']['final']}")
500
+ logger.info(
501
+ f"删除的对话数量: {comparison_report['message_counts']['removed']}")
502
+ logger.info(
503
+ f"消息压缩比: {comparison_report['compression']['message_compression_ratio']:.2%}")
504
+ logger.info(
505
+ f"Token压缩比: {comparison_report['compression']['token_compression_ratio']:.2%}")
506
+ logger.info(f"Token节省数量: {comparison_report['tokens']['saved']:,}")
507
+
508
+ if comparison_report['changes']['tool_results_modified'] > 0:
509
+ logger.info(
510
+ f"工具结果消息清理数量: {comparison_report['changes']['tool_results_modified']}")
511
+
512
+ if comparison_report['changes']['tool_calls_modified'] > 0:
513
+ logger.info(
514
+ f"工具调用内容清理数量: {comparison_report['changes']['tool_calls_modified']}")
515
+
516
+ if comparison_report['changes']['messages_removed_by_ids'] > 0:
517
+ logger.info(
518
+ f"基于消息ID删除的消息数量: {comparison_report['changes']['messages_removed_by_ids']}")
519
+
520
+ # 保存详细的对比报告到文件日志
521
+ save_formatted_log(
522
+ self.args.source_dir,
523
+ json.dumps(comparison_report, ensure_ascii=False, indent=2),
524
+ "conversation_comparison_report",
525
+ conversation_id=self._get_current_conversation_id()
526
+ )
527
+
528
+ except Exception as e:
529
+ logger.error(f"生成对话对比报告时出错: {str(e)}")
530
+ logger.exception(e)
531
+
532
+ def _generate_comparison_report(self, original_conversations: List[Dict[str, Any]],
533
+ pruned_conversations: List[Dict[str, Any]]) -> Dict[str, Any]:
534
+ """
535
+ 生成详细的对比分析报告。
536
+
537
+ Args:
538
+ original_conversations: 裁剪前的对话列表
539
+ pruned_conversations: 裁剪后的对话列表
540
+
541
+ Returns:
542
+ 包含详细对比信息的字典
543
+ """
544
+ # 基础统计信息
545
+ original_count = len(original_conversations)
546
+ pruned_count = len(pruned_conversations)
547
+ removed_count = original_count - pruned_count
548
+
549
+ # Token统计
550
+ original_tokens = count_string_tokens(
551
+ json.dumps(original_conversations, ensure_ascii=False))
552
+ pruned_tokens = count_string_tokens(
553
+ json.dumps(pruned_conversations, ensure_ascii=False))
554
+ tokens_saved = original_tokens - pruned_tokens
555
+
556
+ # 分析变化详情
557
+ changes_analysis = self._analyze_conversation_changes(
558
+ original_conversations, pruned_conversations)
559
+
560
+ # 分析消息类型分布
561
+ original_distribution = self._analyze_message_distribution(
562
+ original_conversations)
563
+ pruned_distribution = self._analyze_message_distribution(
564
+ pruned_conversations)
565
+
566
+ # 生成完整的对比报告
567
+ report = {
568
+ "timestamp": str(__import__("datetime").datetime.now()),
569
+ "conversation_id": self._get_current_conversation_id(),
570
+ "pruning_strategy": {
571
+ "range_pruning_applied": self.pruning_stats["range_pruning_applied"],
572
+ "tool_cleanup_applied": True,
573
+ "safe_zone_tokens": self._get_parsed_safe_zone_tokens()
574
+ },
575
+ "message_counts": {
576
+ "original": original_count,
577
+ "final": pruned_count,
578
+ "removed": removed_count,
579
+ "after_range_pruning": self.pruning_stats.get("after_range_pruning", original_count)
580
+ },
581
+ "tokens": {
582
+ "original": original_tokens,
583
+ "final": pruned_tokens,
584
+ "saved": tokens_saved,
585
+ "safe_zone_limit": self._get_parsed_safe_zone_tokens()
586
+ },
587
+ "compression": {
588
+ "message_compression_ratio": pruned_count / original_count if original_count > 0 else 1.0,
589
+ "token_compression_ratio": pruned_tokens / original_tokens if original_tokens > 0 else 1.0,
590
+ "range_pruning_compression": (
591
+ self.pruning_stats.get(
592
+ "after_range_pruning", original_count) / original_count
593
+ if original_count > 0 else 1.0
594
+ ),
595
+ "tool_cleanup_compression": (
596
+ pruned_count /
597
+ self.pruning_stats.get(
598
+ "after_range_pruning", original_count)
599
+ if self.pruning_stats.get("after_range_pruning", original_count) > 0 else 1.0
600
+ )
601
+ },
602
+ "changes": {
603
+ "messages_removed_by_ids": (
604
+ original_count -
605
+ self.pruning_stats.get(
606
+ "after_range_pruning", original_count)
607
+ ),
608
+ "tool_results_modified": changes_analysis["tool_results_modified"],
609
+ "tool_calls_modified": changes_analysis["tool_calls_modified"],
610
+ "content_modifications": changes_analysis["content_modifications"],
611
+ "unchanged_messages": changes_analysis["unchanged_messages"]
612
+ },
613
+ "message_distribution": {
614
+ "original": original_distribution,
615
+ "pruned": pruned_distribution
616
+ },
617
+ "detailed_changes": changes_analysis["detailed_changes"],
618
+ "pruning_effectiveness": {
619
+ "tokens_per_message_before": original_tokens / original_count if original_count > 0 else 0,
620
+ "tokens_per_message_after": pruned_tokens / pruned_count if pruned_count > 0 else 0,
621
+ "average_token_reduction_per_message": tokens_saved / original_count if original_count > 0 else 0,
622
+ "within_safe_zone": pruned_tokens <= self._get_parsed_safe_zone_tokens()
623
+ }
624
+ }
625
+
626
+ return report
627
+
628
+ def _analyze_conversation_changes(self, original_conversations: List[Dict[str, Any]],
629
+ pruned_conversations: List[Dict[str, Any]]) -> Dict[str, Any]:
630
+ """
631
+ 分析对话变化的详细信息。
632
+
633
+ Args:
634
+ original_conversations: 原始对话列表
635
+ pruned_conversations: 裁剪后对话列表
636
+
637
+ Returns:
638
+ 包含变化分析的字典
639
+ """
640
+ tool_results_modified = 0
641
+ tool_calls_modified = 0
642
+ content_modifications = 0
643
+ unchanged_messages = 0
644
+ detailed_changes = []
645
+
646
+ # 创建一个映射来匹配原始和裁剪后的消息
647
+ min_length = min(len(original_conversations),
648
+ len(pruned_conversations))
649
+
650
+ for i in range(min_length):
651
+ original_msg = original_conversations[i]
652
+ pruned_msg = pruned_conversations[i]
653
+
654
+ original_content = original_msg.get("content", "")
655
+ pruned_content = pruned_msg.get("content", "")
656
+
657
+ if original_content != pruned_content:
658
+ content_modifications += 1
659
+
660
+ # 分析修改类型
661
+ change_type = "content_modified"
662
+ tool_name = None
663
+
664
+ if (original_msg.get("role") == "user" and
665
+ self._is_tool_result_message(original_content)):
666
+ tool_results_modified += 1
667
+ change_type = "tool_result_cleaned"
668
+ tool_name = self._extract_tool_name(original_content)
669
+
670
+ elif (original_msg.get("role") == "assistant" and
671
+ self.tool_content_detector.is_tool_call_content(original_content)):
672
+ tool_calls_modified += 1
673
+ change_type = "tool_call_cleaned"
674
+ tool_info = self.tool_content_detector.detect_tool_call(
675
+ original_content)
676
+ tool_name = tool_info.get(
677
+ "tool_name") if tool_info else "unknown"
678
+
679
+ detailed_changes.append({
680
+ "message_index": i,
681
+ "role": original_msg.get("role"),
682
+ "change_type": change_type,
683
+ "tool_name": tool_name,
684
+ "original_length": len(original_content),
685
+ "pruned_length": len(pruned_content),
686
+ "size_reduction": len(original_content) - len(pruned_content)
687
+ })
688
+ else:
689
+ unchanged_messages += 1
690
+
691
+ return {
692
+ "tool_results_modified": tool_results_modified,
693
+ "tool_calls_modified": tool_calls_modified,
694
+ "content_modifications": content_modifications,
695
+ "unchanged_messages": unchanged_messages,
696
+ "detailed_changes": detailed_changes
697
+ }
698
+
699
+ def _analyze_message_distribution(self, conversations: List[Dict[str, Any]]) -> Dict[str, Any]:
700
+ """
701
+ 分析消息的角色分布和类型分布。
702
+
703
+ Args:
704
+ conversations: 对话列表
705
+
706
+ Returns:
707
+ 包含分布信息的字典
708
+ """
709
+ role_counts = {"user": 0, "assistant": 0, "system": 0, "other": 0}
710
+ message_types = {
711
+ "tool_result": 0,
712
+ "tool_call": 0,
713
+ "regular_user": 0,
714
+ "regular_assistant": 0,
715
+ "system": 0
716
+ }
717
+
718
+ for msg in conversations:
719
+ role = msg.get("role", "other")
720
+ content = msg.get("content", "")
721
+
722
+ # 统计角色分布
723
+ if role in role_counts:
724
+ role_counts[role] += 1
725
+ else:
726
+ role_counts["other"] += 1
727
+
728
+ # 统计消息类型分布
729
+ if role == "system":
730
+ message_types["system"] += 1
731
+ elif role == "user":
732
+ if self._is_tool_result_message(content):
733
+ message_types["tool_result"] += 1
734
+ else:
735
+ message_types["regular_user"] += 1
736
+ elif role == "assistant":
737
+ if self.tool_content_detector.is_tool_call_content(content):
738
+ message_types["tool_call"] += 1
739
+ else:
740
+ message_types["regular_assistant"] += 1
741
+
742
+ return {
743
+ "total_messages": len(conversations),
744
+ "role_distribution": role_counts,
745
+ "message_type_distribution": message_types
746
+ }