RubyGems - rails_console_ai - Versions diffs - 0.22.0 → 0.23.0 - Mend

rails_console_ai 0.22.0 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +22 -0
data/lib/generators/rails_console_ai/templates/initializer.rb +8 -4
data/lib/rails_console_ai/channel/slack.rb +2 -1
data/lib/rails_console_ai/configuration.rb +23 -0
data/lib/rails_console_ai/context_builder.rb +14 -7
data/lib/rails_console_ai/conversation_engine.rb +327 -104
data/lib/rails_console_ai/executor.rb +19 -10
data/lib/rails_console_ai/providers/bedrock.rb +2 -0
data/lib/rails_console_ai/repl.rb +2 -2
data/lib/rails_console_ai/skill_loader.rb +49 -0
data/lib/rails_console_ai/slack_bot.rb +20 -16
data/lib/rails_console_ai/tools/memory_tools.rb +22 -5
data/lib/rails_console_ai/tools/model_tools.rb +28 -0
data/lib/rails_console_ai/tools/registry.rb +61 -3
data/lib/rails_console_ai/version.rb +1 -1
metadata +1 -1

data/lib/rails_console_ai/conversation_engine.rb CHANGED Viewed

@@ -3,9 +3,10 @@ module RailsConsoleAi
     attr_reader :history, :total_input_tokens, :total_output_tokens,
                 :interactive_session_id, :session_name
-    RECENT_OUTPUTS_TO_KEEP = 2
     LARGE_OUTPUT_THRESHOLD = 10_000      # chars — truncate tool results larger than this immediately
     LARGE_OUTPUT_PREVIEW_CHARS = 8_000   # chars — how much of the output the LLM sees upfront
+    LOOP_WARN_THRESHOLD = 3              # same tool+args repeated → inject warning
+    LOOP_BREAK_THRESHOLD = 5             # same tool+args repeated → break loop
     def initialize(binding_context:, channel:, slack_thread_ts: nil, slack_channel_name: nil)
       @binding_context = binding_context
@@ -30,6 +31,7 @@ module RailsConsoleAi
       @last_interactive_executed = false
       @compact_warned = false
       @prior_duration_ms = 0
+      @expanded_output_ids = Set.new
     end
     # --- Public API for channels ---
@@ -47,7 +49,7 @@ module RailsConsoleAi
           conversation << { role: :assistant, content: @_last_result_text }
           conversation << { role: :user, content: error_msg }
-          @channel.display_dim("  Attempting to fix...")
+          @channel.display_dim("  Ran into an issue, trying a different approach...")
           exec_result, code, executed = one_shot_round(conversation)
         end
@@ -112,7 +114,7 @@ module RailsConsoleAi
       status = send_and_execute
       if status == :error
-        @channel.display_dim("  Attempting to fix...")
+        @channel.display_dim("  Ran into an issue, trying a different approach...")
         send_and_execute
       end
     end
@@ -235,7 +237,7 @@ module RailsConsoleAi
     end
     def execute_direct(raw_code)
-      exec_result = @executor.execute(raw_code)
+      exec_result = @executor.execute_unsafe(raw_code)
       output_parts = []
       output_parts << "Output:\n#{@executor.last_output.strip}" if @executor.last_output && !@executor.last_output.strip.empty?
@@ -244,12 +246,11 @@ module RailsConsoleAi
       result_str = output_parts.join("\n\n")
       context_msg = "User directly executed code: `#{raw_code}`"
+      output_id = @executor.store_output(result_str)
       if result_str.length > LARGE_OUTPUT_THRESHOLD
-        output_id = @executor.store_output(result_str)
         preview = result_str[0, LARGE_OUTPUT_PREVIEW_CHARS]
         context_msg += "\n#{preview}\n\n[Output truncated at #{LARGE_OUTPUT_PREVIEW_CHARS} of #{result_str.length} chars — use recall_output tool with id #{output_id} to retrieve the full output]"
       elsif !output_parts.empty?
-        output_id = @executor.store_output(result_str)
         context_msg += "\n#{result_str}"
       end
       @history << { role: :user, content: context_msg, output_id: output_id }
@@ -263,7 +264,7 @@ module RailsConsoleAi
     def send_and_execute
       begin
-        result, tool_messages = send_query(nil, conversation: @history)
+        result, tool_messages, last_llm_stats = send_query(nil, conversation: @history)
       rescue Providers::ProviderError => e
         if e.message.include?("prompt is too long") && @history.length >= 6
           @channel.display_warning("  Context limit reached. Run /compact to reduce context size, then try again.")
@@ -284,7 +285,15 @@ module RailsConsoleAi
       log_interactive_turn
       @history.concat(tool_messages) if tool_messages && !tool_messages.empty?
-      @history << { role: :assistant, content: result.text }
+      # Only add the final assistant text when the LLM gave a final response (end_turn).
+      # For tool_use results, the assistant message is already in tool_messages via
+      # format_assistant_message, so adding result.text again would duplicate it —
+      # and if the text is empty, Bedrock rejects the empty content array.
+      unless result.tool_use?
+        entry = { role: :assistant, content: result.text }
+        entry[:llm_stats] = last_llm_stats if last_llm_stats
+        @history << entry
+      end
       return :no_code unless code && !code.strip.empty?
       return :cancelled if @channel.cancelled?
@@ -413,15 +422,23 @@ module RailsConsoleAi
         return
       end
-      trimmed = trim_old_outputs(@history)
-      stdout.puts "\e[36m  Conversation (#{trimmed.length} messages, as sent to LLM):\e[0m"
-      trimmed.each_with_index do |msg, i|
-        role = msg[:role].to_s
-        content = msg[:content].to_s
-        label = role == 'user' ? "\e[33m[user]\e[0m" : "\e[36m[assistant]\e[0m"
-        stdout.puts "#{label} #{content}"
-        stdout.puts if i < trimmed.length - 1
-      end
+      messages = trim_large_outputs(@history)
+      system_prompt = context
+      require 'rails_console_ai/tools/registry'
+      tools = Tools::Registry.new(executor: @executor, channel: @channel) rescue nil
+      opts = { io: stdout, prefix: "  ", d: "\e[2m", r: "\e[0m" }
+      conversation_summary(messages, system_prompt, tools, **opts)
+      conversation_messages(messages, **opts)
+    end
+    def display_conversation_to(io)
+      messages = trim_large_outputs(@history)
+      system_prompt = context
+      require 'rails_console_ai/tools/registry'
+      tools = Tools::Registry.new(executor: @executor, channel: @channel) rescue nil
+      opts = { io: io, prefix: "", d: "", r: "" }
+      conversation_summary(messages, system_prompt, tools, **opts)
+      conversation_messages(messages, **opts)
     end
     def context
@@ -710,8 +727,8 @@ module RailsConsoleAi
         EXPLORATION STRATEGY — be efficient to avoid timeouts:
         1. Start with list_models to see all models and their associations
-        2. Pick the 5-8 CORE models and call describe_model on those only
-        3. Call describe_table on only 3-5 key tables (skip tables whose models already told you enough)
+        2. Pick the 5-8 CORE models and call describe_model on those only (it includes columns, indexes, associations, validations)
+        3. Call describe_table only for tables that have NO corresponding model (join tables, legacy tables, etc.)
         4. Use search_code sparingly — only for specific patterns you suspect (sharding, STI, concerns)
         5. Use read_file only when you need to understand a specific pattern (read small sections, not whole files)
         6. Do NOT exhaustively describe every table or model — focus on what's important
@@ -751,7 +768,7 @@ module RailsConsoleAi
                    [{ role: :user, content: query }]
                  end
-      messages = trim_old_outputs(messages) if conversation
+      messages = trim_large_outputs(messages) if conversation
       send_query_with_tools(messages)
     end
@@ -769,6 +786,7 @@ module RailsConsoleAi
       last_tool_names = []
       exhausted = false
+      tool_call_counts = Hash.new(0)
       max_rounds.times do |round|
         if @channel.cancelled?
@@ -787,9 +805,9 @@ module RailsConsoleAi
           @channel.display_dim("  #{llm_status(round, messages, total_input, last_thinking, last_tool_names)}")
         end
-        # Trim old tool outputs between rounds to prevent context explosion.
+        # Trim large tool outputs between rounds to prevent context explosion.
         # The LLM can still retrieve omitted outputs via recall_output.
-        messages = trim_old_outputs(messages) if round > 0
+        messages = trim_large_outputs(messages) if round > 0
         if RailsConsoleAi.configuration.debug
           debug_pre_call(round, messages, active_system_prompt, tools, total_input, total_output)
@@ -816,6 +834,7 @@ module RailsConsoleAi
         last_thinking = (result.text && !result.text.strip.empty?) ? result.text.strip : nil
         assistant_msg = provider.format_assistant_message(result)
+        assistant_msg[:llm_stats] = format_llm_stats(result)
         messages << assistant_msg
         new_messages << assistant_msg
@@ -844,12 +863,13 @@ module RailsConsoleAi
             next
           end
+          # Display any pending LLM text before executing the tool
+          if last_thinking
+            last_thinking.split("\n").each { |line| @channel.display_dim("  #{line}") }
+            last_thinking = nil
+          end
           if tc[:name] == 'ask_user' || tc[:name] == 'execute_plan'
-            # Display any pending LLM text before prompting the user
-            if last_thinking
-              last_thinking.split("\n").each { |line| @channel.display_dim("  #{line}") }
-              last_thinking = nil
-            end
             tool_result = tools.execute(tc[:name], tc[:arguments])
           else
             args_display = format_tool_args(tc[:name], tc[:arguments])
@@ -868,20 +888,35 @@ module RailsConsoleAi
           tool_msg = provider.format_tool_result(tc[:id], tool_result)
           full_text = tool_result.to_s
+          output_id = @executor.store_output(full_text)
+          tool_msg[:output_id] = output_id
           if full_text.length > LARGE_OUTPUT_THRESHOLD
-            output_id = @executor.store_output(full_text)
-            tool_msg[:output_id] = output_id
             truncated = full_text[0, LARGE_OUTPUT_PREVIEW_CHARS]
             truncated += "\n\n[Output truncated at #{LARGE_OUTPUT_PREVIEW_CHARS} of #{full_text.length} chars — use recall_output tool with id #{output_id} to retrieve the full output]"
             tool_msg = provider.format_tool_result(tc[:id], truncated)
             tool_msg[:output_id] = output_id
-          elsif full_text.length > 200
-            tool_msg[:output_id] = @executor.store_output(full_text)
           end
+          tool_msg[:do_not_trim] = true if %w[recall_memory recall_memories activate_skill
+                                                describe_model describe_table list_models list_tables].include?(tc[:name])
           messages << tool_msg
           new_messages << tool_msg
         end
+        # Loop detection: track repeated identical tool calls
+        result.tool_calls.each do |tc|
+          key = "#{tc[:name]}:#{tc[:arguments].to_json}"
+          tool_call_counts[key] += 1
+          if tool_call_counts[key] >= LOOP_BREAK_THRESHOLD
+            @channel.display_dim("  Loop detected: #{tc[:name]} called #{tool_call_counts[key]} times with same args — stopping.")
+            exhausted = true
+          elsif tool_call_counts[key] >= LOOP_WARN_THRESHOLD
+            @channel.display_dim("  Warning: #{tc[:name]} called #{tool_call_counts[key]} times with same args — consider a different approach.")
+            messages << { role: :user, content: "You are repeating the same tool call (#{tc[:name]}) with the same arguments. This is not making progress. Try a different approach or provide your answer now." }
+          end
+        end
+        break if exhausted
         # If the user declined execution, don't call the LLM again —
         # just return to the prompt so they can correct their request.
         break if @executor.last_cancelled?
@@ -889,10 +924,6 @@ module RailsConsoleAi
         exhausted = true if round == max_rounds - 1
       end
-      # Re-truncate any outputs that were expanded for the LLM — the LLM has
-      # seen them and responded, so collapse back to save context on future calls.
-      re_truncate_expanded(messages)
       if exhausted
         $stdout.puts "\e[33m  Hit tool round limit (#{max_rounds}). Forcing final answer. Increase with: RailsConsoleAi.configure { |c| c.max_tool_rounds = 200 }\e[0m"
         messages << { role: :user, content: "You've used all available tool rounds. Please provide your best answer now based on what you've learned so far." }
@@ -901,13 +932,14 @@ module RailsConsoleAi
         total_output += result.output_tokens || 0
       end
+      last_llm_stats = result ? format_llm_stats(result) : nil
       final_result = Providers::ChatResult.new(
         text: result ? result.text : '',
         input_tokens: total_input,
         output_tokens: total_output,
         stop_reason: result ? result.stop_reason : :end_turn
       )
-      [final_result, new_messages]
+      [final_result, new_messages, last_llm_stats]
     end
     def track_usage(result)
@@ -928,6 +960,9 @@ module RailsConsoleAi
       parts = []
       parts << "in: #{input}" if input
+      cache_r = result.cache_read_input_tokens || 0
+      cache_w = result.cache_write_input_tokens || 0
+      parts << "cache r: #{cache_r} w: #{cache_w}" if cache_r > 0 || cache_w > 0
       parts << "out: #{output}" if output
       parts << "total: #{result.total_tokens}"
@@ -989,8 +1024,11 @@ module RailsConsoleAi
       when 'list_files'      then args['directory'] ? "(\"#{args['directory']}\")" : ''
       when 'save_memory'     then "(\"#{args['name']}\")"
       when 'delete_memory'   then "(\"#{args['name']}\")"
+      when 'recall_memory'   then "(\"#{args['name']}\")"
       when 'recall_memories' then args['query'] ? "(\"#{args['query']}\")" : ''
       when 'activate_skill' then "(\"#{args['name']}\")"
+      when 'save_skill'     then "(\"#{args['name']}\")"
+      when 'delete_skill'   then "(\"#{args['name']}\")"
       when 'recall_output'   then "(#{args['id']})"
       when 'execute_plan'
         steps = args['steps']
@@ -1013,8 +1051,10 @@ module RailsConsoleAi
         "#{result.scan(/^\s{2}\S/).length} columns"
       when 'describe_model'
         parts = []
+        col_count = result.scan(/^\s{2}\S+:\S+/).length
         assoc_count = result.scan(/^\s{2}(has_many|has_one|belongs_to|has_and_belongs_to_many)/).length
         val_count = result.scan(/^\s{2}(presence|uniqueness|format|length|numericality|inclusion|exclusion|confirmation|acceptance)/).length
+        parts << "#{col_count} columns" if col_count > 0
         parts << "#{assoc_count} associations" if assoc_count > 0
         parts << "#{val_count} validations" if val_count > 0
         parts.empty? ? truncate(result, 80) : parts.join(', ')
@@ -1034,9 +1074,27 @@ module RailsConsoleAi
         (result.start_with?('Memory saved') || result.start_with?('Memory updated')) ? result : truncate(result, 80)
       when 'delete_memory'
         result.start_with?('Memory deleted') ? result : truncate(result, 80)
+      when 'save_skill'
+        (result.start_with?('Skill created') || result.start_with?('Skill updated')) ? result : truncate(result, 80)
+      when 'delete_skill'
+        result.start_with?('Skill deleted') ? result : truncate(result, 80)
+      when 'recall_memory'
+        if result.start_with?('No memory found')
+          result
+        else
+          name = result[/\A\*\*(.+?)\*\*/, 1]
+          name ? "loaded: #{name}" : truncate(result, 80)
+        end
       when 'recall_memories'
-        chunks = result.split("\n\n")
-        chunks.length > 1 ? "#{chunks.length} memories found" : truncate(result, 80)
+        chunks = result.split("\n\n---\n\n")
+        names = chunks.map { |c| c[/\A\*\*(.+?)\*\*/, 1] }.compact
+        if names.length > 1
+          "#{names.length} memories found: #{names.join(', ')}"
+        elsif names.length == 1
+          "1 memory found: #{names.first}"
+        else
+          truncate(result, 80)
+        end
       when 'execute_plan'
         steps_done = result.scan(/^Step \d+/).length
         steps_done > 0 ? "#{steps_done} steps executed" : truncate(result, 80)
@@ -1062,36 +1120,103 @@ module RailsConsoleAi
       status
     end
+    # Provider-agnostic block detection helpers.
+    # Anthropic uses string keys: { 'type' => 'tool_result', ... }
+    # Bedrock uses symbol keys:   { tool_result: { ... } }
+    def tool_result_block?(block)
+      return false unless block.is_a?(Hash)
+      block['type'] == 'tool_result' || block.key?(:tool_result)
+    end
+    def tool_use_block?(block)
+      return false unless block.is_a?(Hash)
+      block['type'] == 'tool_use' || block.key?(:tool_use)
+    end
+    def tool_result_content(block)
+      if block['type'] == 'tool_result'
+        block['content'].to_s
+      elsif block.key?(:tool_result)
+        content = block[:tool_result][:content]
+        content.is_a?(Array) ? content.map { |c| c[:text].to_s }.join : content.to_s
+      else
+        ''
+      end
+    end
+    def tool_use_name(block)
+      if block['type'] == 'tool_use'
+        block['name']
+      elsif block.key?(:tool_use)
+        block[:tool_use][:name]
+      end
+    end
     def debug_pre_call(round, messages, system_prompt, tools, total_input, total_output)
       d = "\e[35m"
       r = "\e[0m"
+      $stderr.puts "#{d}[debug] ── LLM call ##{round + 1} ──#{r}"
+      conversation_summary(messages, system_prompt, tools, io: $stderr, prefix: "[debug]   ", d: d, r: r)
+      if total_input > 0 || total_output > 0
+        $stderr.puts "#{d}[debug]   tokens so far: in: #{format_tokens(total_input)} | out: #{format_tokens(total_output)}#{r}"
+      end
+      conversation_messages(messages, io: $stderr, prefix: "[debug]   ", d: d, r: r, show_pending: true)
+    end
+    def conversation_summary(messages, system_prompt, tools, io:, prefix:, d:, r:)
       user_msgs = 0; assistant_msgs = 0; tool_result_msgs = 0; tool_use_msgs = 0
-      output_msgs = 0; omitted_msgs = 0
+      output_msgs = 0; omitted_msgs = 0; expanded_msgs = 0
       total_content_chars = system_prompt.to_s.length
       messages.each do |msg|
         content_str = msg[:content].is_a?(Array) ? msg[:content].to_s : msg[:content].to_s
         total_content_chars += content_str.length
+        is_expanded = msg[:expanded] || (msg[:output_id] && @expanded_output_ids.include?(msg[:output_id]))
         role = msg[:role].to_s
         if role == 'tool'
           tool_result_msgs += 1
         elsif msg[:content].is_a?(Array)
+          has_tool_block = false
           msg[:content].each do |block|
             next unless block.is_a?(Hash)
-            if block['type'] == 'tool_result'
+            if tool_result_block?(block)
               tool_result_msgs += 1
-              omitted_msgs += 1 if block['content'].to_s.include?('Output omitted')
-            elsif block['type'] == 'tool_use'
+              has_tool_block = true
+              if is_expanded
+                expanded_msgs += 1
+              elsif tool_result_content(block).include?('Output omitted')
+                omitted_msgs += 1
+              end
+            elsif tool_use_block?(block)
               tool_use_msgs += 1
+              has_tool_block = true
+            end
+          end
+          unless has_tool_block
+            if role == 'user'
+              user_msgs += 1
+              if content_str.include?('Code was executed') || content_str.include?('directly executed code')
+                output_msgs += 1
+                if is_expanded
+                  expanded_msgs += 1
+                elsif content_str.include?('Output omitted')
+                  omitted_msgs += 1
+                end
+              end
+            elsif role == 'assistant'
+              assistant_msgs += 1
             end
           end
         elsif role == 'user'
           user_msgs += 1
           if content_str.include?('Code was executed') || content_str.include?('directly executed code')
             output_msgs += 1
-            omitted_msgs += 1 if content_str.include?('Output omitted')
+            if is_expanded
+              expanded_msgs += 1
+            elsif content_str.include?('Output omitted')
+              omitted_msgs += 1
+            end
           end
         elsif role == 'assistant'
           assistant_msgs += 1
@@ -1100,15 +1225,108 @@ module RailsConsoleAi
       tool_count = tools.respond_to?(:definitions) ? tools.definitions.length : 0
-      $stderr.puts "#{d}[debug] ── LLM call ##{round + 1} ──#{r}"
-      $stderr.puts "#{d}[debug]   system prompt: #{format_tokens(system_prompt.to_s.length)} chars#{r}"
-      $stderr.puts "#{d}[debug]   messages: #{messages.length} (#{user_msgs} user, #{assistant_msgs} assistant, #{tool_result_msgs} tool results, #{tool_use_msgs} tool calls)#{r}"
-      $stderr.puts "#{d}[debug]   execution outputs: #{output_msgs} (#{omitted_msgs} omitted)#{r}" if output_msgs > 0 || omitted_msgs > 0
-      $stderr.puts "#{d}[debug]   tools provided: #{tool_count}#{r}"
-      $stderr.puts "#{d}[debug]   est. content size: #{format_tokens(total_content_chars)} chars#{r}"
-      if total_input > 0 || total_output > 0
-        $stderr.puts "#{d}[debug]   tokens so far: in: #{format_tokens(total_input)} | out: #{format_tokens(total_output)}#{r}"
+      io.puts "#{d}#{prefix}system prompt: #{format_tokens(system_prompt.to_s.length)} chars#{r}"
+      io.puts "#{d}#{prefix}messages: #{messages.length} (#{user_msgs} user, #{assistant_msgs} assistant, #{tool_result_msgs} tool results, #{tool_use_msgs} tool calls)#{r}"
+      if output_msgs > 0 || omitted_msgs > 0 || expanded_msgs > 0
+        detail_parts = []
+        detail_parts << "#{omitted_msgs} omitted" if omitted_msgs > 0
+        detail_parts << "#{expanded_msgs} expanded" if expanded_msgs > 0
+        io.puts "#{d}#{prefix}execution outputs: #{output_msgs} (#{detail_parts.join(', ')})#{r}"
       end
+      io.puts "#{d}#{prefix}tools provided: #{tool_count}#{r}"
+      io.puts "#{d}#{prefix}est. content size: #{format_tokens(total_content_chars)} chars#{r}"
+    end
+    def conversation_messages(messages, io:, prefix:, d:, r:, show_pending: false)
+      io.puts "#{d}#{prefix}conversation:#{r}"
+      llm_call = 0
+      messages.each_with_index do |msg, i|
+        role = msg[:role].to_s
+        parts = []
+        display_role = role
+        is_assistant = role == 'assistant' || (msg[:content].is_a?(Array) && msg[:content].any? { |b| b.is_a?(Hash) && tool_use_block?(b) })
+        if is_assistant
+          llm_call += 1
+          stats = msg[:llm_stats] ? " → #{msg[:llm_stats]}" : ""
+          io.puts "#{d}#{prefix}  ── LLM call ##{llm_call}#{stats} ──#{r}"
+        end
+        if role == 'tool'
+          display_role = 'tool_result'
+          text = msg[:content].to_s
+          flags = debug_output_flags(text, msg)
+          flag_str = flags.any? ? ", #{flags.join(', ')}" : ""
+          parts << "#{text.length} chars#{flag_str}"
+        elsif msg[:content].is_a?(Array)
+          has_tool_result = false
+          has_tool_use = false
+          msg[:content].each do |block|
+            next unless block.is_a?(Hash)
+            if tool_result_block?(block)
+              has_tool_result = true
+              content = tool_result_content(block)
+              flags = debug_output_flags(content, msg)
+              flag_str = flags.any? ? ", #{flags.join(', ')}" : ""
+              parts << "#{content.length} chars#{flag_str}"
+            elsif tool_use_block?(block)
+              has_tool_use = true
+              parts << "tool_use: #{tool_use_name(block)}"
+            elsif block['type'] == 'text' || block.key?(:text)
+              text = block['text'] || block[:text]
+              parts << "text(#{text.to_s.length} chars)" if text.to_s.length > 0
+            end
+          end
+          display_role = 'tool_result' if has_tool_result && !has_tool_use
+          display_role = 'assistant' if has_tool_use && !has_tool_result
+        else
+          text = msg[:content].to_s
+          preview = text.length > 60 ? text[0, 57] + "..." : text
+          preview = preview.gsub("\n", "\\n")
+          flags = debug_output_flags(text, msg)
+          flag_str = flags.any? ? " (#{flags.join(', ')})" : ""
+          parts << "\"#{preview}\" #{text.length} chars#{flag_str}"
+        end
+        io.puts "#{d}#{prefix}  ##{i} #{display_role}: [#{parts.join(', ')}]#{r}"
+      end
+      if show_pending
+        llm_call += 1
+        io.puts "#{d}#{prefix}  ── LLM call ##{llm_call} (pending) ──#{r}"
+      end
+    end
+    def format_llm_stats(result)
+      parts = ["in: #{format_tokens(result.input_tokens || 0)}"]
+      parts << "out: #{format_tokens(result.output_tokens || 0)}"
+      cache_r = result.cache_read_input_tokens || 0
+      cache_w = result.cache_write_input_tokens || 0
+      parts << "cache r: #{format_tokens(cache_r)} w: #{format_tokens(cache_w)}" if cache_r > 0 || cache_w > 0
+      model = effective_model
+      pricing = Configuration::PRICING[model]
+      if pricing
+        cost = ((result.input_tokens || 0) * pricing[:input]) + ((result.output_tokens || 0) * pricing[:output])
+        if (cache_r > 0 || cache_w > 0) && pricing[:cache_read]
+          cost -= cache_r * pricing[:input]
+          cost += cache_r * pricing[:cache_read]
+          cost += cache_w * (pricing[:cache_write] - pricing[:input])
+        end
+        parts << "~$#{'%.4f' % cost}"
+      end
+      parts.join(' | ')
+    end
+    def debug_output_flags(content_text, msg)
+      flags = []
+      flags << "output ##{msg[:output_id]}" if msg[:output_id]
+      if msg[:expanded] || (msg[:output_id] && @expanded_output_ids.include?(msg[:output_id]))
+        flags << "expanded"
+      elsif content_text.include?('Output omitted')
+        flags << "omitted"
+      elsif (m = content_text.match(/Output truncated at (\S+) of (\S+) chars/))
+        flags << "truncated #{m[2]}→#{m[1]}"
+      end
+      flags
     end
     def debug_post_call(round, result, total_input, total_output)
@@ -1135,9 +1353,9 @@ module RailsConsoleAi
         end
         session_cost = (total_input * pricing[:input]) + (total_output * pricing[:output])
         parts << "~$#{'%.4f' % cost}"
-        $stderr.puts "#{d}[debug]   ← response: #{parts.join(' | ')}  (session: ~$#{'%.4f' % session_cost})#{r}"
+        $stderr.puts "\n#{d}[debug]   ← response: #{parts.join(' | ')}  (session: ~$#{'%.4f' % session_cost})#{r}"
       else
-        $stderr.puts "#{d}[debug]   ← response: #{parts.join(' | ')}#{r}"
+        $stderr.puts "\n#{d}[debug]   ← response: #{parts.join(' | ')}#{r}"
       end
       if result.tool_use?
@@ -1150,20 +1368,18 @@ module RailsConsoleAi
     # --- Conversation context management ---
-    def trim_old_outputs(messages)
-      output_indices = messages.each_with_index
-                               .select { |m, _| m[:output_id] }
-                               .map { |_, i| i }
-      return messages if output_indices.length <= RECENT_OUTPUTS_TO_KEEP
-      trim_indices = output_indices[0..-(RECENT_OUTPUTS_TO_KEEP + 1)]
-      messages.each_with_index.map do |msg, i|
-        if trim_indices.include?(i)
-          trim_message(msg)
-        else
-          msg
+    def trim_large_outputs(messages)
+      messages.map do |msg|
+        next msg unless msg[:output_id] && !msg[:do_not_trim]
+        # Re-expand messages that were expanded in a prior turn but lost content
+        # (because trim_message creates new hashes, disconnecting from @history)
+        if @expanded_output_ids.include?(msg[:output_id]) && !msg[:expanded]
+          expand_message_in_place(msg)
         end
+        next msg if msg[:expanded]
+        stored = @executor.recall_output(msg[:output_id])
+        next msg unless stored && stored.length > LARGE_OUTPUT_THRESHOLD
+        trim_message(msg)
       end
     end
@@ -1172,8 +1388,14 @@ module RailsConsoleAi
       if msg[:content].is_a?(Array)
         trimmed_content = msg[:content].map do |block|
-          if block.is_a?(Hash) && block['type'] == 'tool_result'
-            block.merge('content' => ref)
+          if block.is_a?(Hash) && tool_result_block?(block)
+            if block.key?(:tool_result)
+              # Bedrock format
+              block.merge(tool_result: block[:tool_result].merge(content: [{ text: ref }]))
+            else
+              # Anthropic format
+              block.merge('content' => ref)
+            end
           else
             block
           end
@@ -1191,40 +1413,40 @@ module RailsConsoleAi
       expanded = []
       messages.each do |msg|
         next unless msg[:output_id] && ids.include?(msg[:output_id])
-        full_output = @executor.recall_output(msg[:output_id])
-        next unless full_output
-        # Save original content so re_truncate_expanded can restore it
-        msg[:pre_expand_content] = msg[:content]
-        # Replace content with full output (handle Anthropic, OpenAI, and user message formats)
-        if msg[:content].is_a?(Array)
-          msg[:content] = msg[:content].map do |block|
-            if block.is_a?(Hash) && block['type'] == 'tool_result'
-              block.merge('content' => full_output)
-            else
-              block
-            end
-          end
-        elsif msg[:role].to_s == 'tool'
-          msg[:content] = full_output
-        else
-          # User messages (e.g., direct execution) — preserve first line, replace rest
-          first_line = msg[:content].to_s.lines.first&.chomp || ''
-          msg[:content] = "#{first_line}\n#{full_output}"
-        end
-        msg[:expanded] = true
+        next unless expand_message_in_place(msg)
         expanded << msg[:output_id]
       end
       expanded
     end
-    # Restore messages that were temporarily expanded back to their original
-    # (preview/truncated) content. Called after the LLM has seen the expanded
-    # content and responded.
-    def re_truncate_expanded(messages)
-      messages.each do |msg|
-        next unless msg.delete(:expanded)
-        msg[:content] = msg.delete(:pre_expand_content)
+    def expand_message_in_place(msg)
+      full_output = @executor.recall_output(msg[:output_id])
+      return false unless full_output
+      # Replace content with full output (handle Anthropic, Bedrock, and user message formats)
+      if msg[:content].is_a?(Array)
+        msg[:content] = msg[:content].map do |block|
+          if block.is_a?(Hash) && tool_result_block?(block)
+            if block.key?(:tool_result)
+              # Bedrock format
+              block.merge(tool_result: block[:tool_result].merge(content: [{ text: full_output }]))
+            else
+              # Anthropic format
+              block.merge('content' => full_output)
+            end
+          else
+            block
+          end
+        end
+      elsif msg[:role].to_s == 'tool'
+        msg[:content] = full_output
+      else
+        # User messages (e.g., direct execution) — preserve first line, replace rest
+        first_line = msg[:content].to_s.lines.first&.chomp || ''
+        msg[:content] = "#{first_line}\n#{full_output}"
       end
+      msg[:expanded] = true
+      @expanded_output_ids.add(msg[:output_id])
+      true
     end
     def extract_executed_code(history)
@@ -1246,11 +1468,11 @@ module RailsConsoleAi
         if msg[:role].to_s == 'assistant' && msg[:content].is_a?(Array)
           msg[:content].each do |block|
-            next unless block.is_a?(Hash) && block['type'] == 'tool_use' && block['name'] == 'execute_plan'
-            input = block['input'] || {}
+            next unless block.is_a?(Hash) && tool_use_block?(block) && tool_use_name(block) == 'execute_plan'
+            input = block['input'] || block.dig(:tool_use, :input) || {}
             steps = input['steps'] || []
-            tool_id = block['id']
+            tool_id = block['id'] || block.dig(:tool_use, :tool_use_id)
             result_msg = find_tool_result(history, tool_id)
             next unless result_msg
@@ -1274,14 +1496,15 @@ module RailsConsoleAi
     def find_tool_result(history, tool_id)
       history.each do |msg|
+        if msg[:role].to_s == 'tool' && msg[:tool_call_id] == tool_id
+          return msg[:content]
+        end
         next unless msg[:content].is_a?(Array)
         msg[:content].each do |block|
           next unless block.is_a?(Hash)
-          if block['type'] == 'tool_result' && block['tool_use_id'] == tool_id
-            return block['content']
-          end
-          if msg[:role].to_s == 'tool' && msg[:tool_call_id] == tool_id
-            return msg[:content]
+          if tool_result_block?(block)
+            block_tool_id = block['tool_use_id'] || block.dig(:tool_result, :tool_use_id)
+            return tool_result_content(block) if block_tool_id == tool_id
           end
         end
       end