RubyGems - brute - Versions diffs - 0.4.0 → 1.0.0 - Mend

brute 0.4.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

checksums.yaml +4 -4
data/lib/brute/agent.rb +14 -0
data/lib/brute/diff.rb +24 -0
data/lib/brute/loop/agent_stream.rb +118 -0
data/lib/brute/loop/agent_turn.rb +520 -0
data/lib/brute/{compactor.rb → loop/compactor.rb} +2 -0
data/lib/brute/{doom_loop.rb → loop/doom_loop.rb} +2 -0
data/lib/brute/loop/step.rb +332 -0
data/lib/brute/loop/tool_call_step.rb +90 -0
data/lib/brute/middleware/compaction_check.rb +70 -23
data/lib/brute/middleware/doom_loop_detection.rb +110 -7
data/lib/brute/middleware/llm_call.rb +88 -1
data/lib/brute/middleware/message_tracking.rb +140 -10
data/lib/brute/middleware/otel/span.rb +32 -2
data/lib/brute/middleware/otel/token_usage.rb +38 -0
data/lib/brute/middleware/otel/tool_calls.rb +30 -1
data/lib/brute/middleware/otel/tool_results.rb +29 -1
data/lib/brute/middleware/otel.rb +5 -0
data/lib/brute/middleware/reasoning_normalizer.rb +94 -0
data/lib/brute/middleware/retry.rb +113 -1
data/lib/brute/middleware/session_persistence.rb +46 -3
data/lib/brute/middleware/token_tracking.rb +78 -0
data/lib/brute/middleware/tool_error_tracking.rb +128 -1
data/lib/brute/middleware/tool_use_guard.rb +64 -28
data/lib/brute/middleware/tracing.rb +63 -2
data/lib/brute/middleware.rb +18 -0
data/lib/brute/orchestrator/turn.rb +105 -0
data/lib/brute/patches/buffer_nil_guard.rb +5 -0
data/lib/brute/pipeline.rb +86 -7
data/lib/brute/prompts/build_switch.rb +29 -0
data/lib/brute/prompts/environment.rb +43 -0
data/lib/brute/prompts/identity.rb +29 -0
data/lib/brute/prompts/instructions.rb +21 -0
data/lib/brute/prompts/max_steps.rb +25 -0
data/lib/brute/prompts/plan_reminder.rb +25 -0
data/lib/brute/prompts/skills.rb +13 -0
data/lib/brute/prompts.rb +28 -0
data/lib/brute/providers/ollama.rb +135 -0
data/lib/brute/providers/opencode_go.rb +5 -0
data/lib/brute/providers/opencode_zen.rb +7 -2
data/lib/brute/providers/shell.rb +2 -2
data/lib/brute/providers/shell_response.rb +7 -2
data/lib/brute/providers.rb +62 -0
data/lib/brute/queue/base_queue.rb +222 -0
data/lib/brute/{file_mutation_queue.rb → queue/file_mutation_queue.rb} +28 -26
data/lib/brute/queue/parallel_queue.rb +66 -0
data/lib/brute/queue/sequential_queue.rb +63 -0
data/lib/brute/{message_store.rb → store/message_store.rb} +155 -62
data/lib/brute/store/session.rb +106 -0
data/lib/brute/{snapshot_store.rb → store/snapshot_store.rb} +2 -0
data/lib/brute/{todo_store.rb → store/todo_store.rb} +2 -0
data/lib/brute/system_prompt.rb +101 -0
data/lib/brute/tools/delegate.rb +59 -0
data/lib/brute/tools/fs_patch.rb +54 -2
data/lib/brute/tools/fs_read.rb +5 -0
data/lib/brute/tools/fs_remove.rb +7 -2
data/lib/brute/tools/fs_search.rb +5 -0
data/lib/brute/tools/fs_undo.rb +7 -2
data/lib/brute/tools/fs_write.rb +40 -2
data/lib/brute/tools/net_fetch.rb +5 -0
data/lib/brute/tools/question.rb +5 -0
data/lib/brute/tools/shell.rb +5 -0
data/lib/brute/tools/todo_read.rb +6 -1
data/lib/brute/tools/todo_write.rb +6 -1
data/lib/brute/tools.rb +31 -0
data/lib/brute/version.rb +1 -1
data/lib/brute.rb +40 -204
metadata +31 -20
data/lib/brute/agent_stream.rb +0 -63
data/lib/brute/hooks.rb +0 -84
data/lib/brute/orchestrator.rb +0 -391
data/lib/brute/session.rb +0 -161

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: e6fa4c53a825578634b110724522c021f089595e75e80faea05b5c53697010dd
-  data.tar.gz: 1cff09cf5e255928aada4f09a11c2f77ccf873839ee4f6d0ba24bc12beaefeba
+  metadata.gz: a3594fba62fc4a71baaaf36b878eb1a3c02a3c06f7b0b2517b434556468bcde5
+  data.tar.gz: d8f74e82c95d7698c11ecbe5792f57ace4739a81f4d625d3cb729123eb0b7179
 SHA512:
-  metadata.gz: 795a6b851f47daba23755f8791f98c4c54f1c738704748767e70ff0bf25b797dca15fc25892642b7b46c7f6c8acab83d5dd110b0741e4252e8e8b1ce8798ffa1
-  data.tar.gz: 827d9628e7d5142fe1eaabc5e3de47cf04468afa5e1985a9af6b7ccc16e471ce35236953d3b746e988ef34a779df3cd4b1e6821ca9cd45815fc302785d8d1a00
+  metadata.gz: 861ab5262a21c876fa6592d1fc22612c39aada6e33a30e35ce81adb1bbbdfa978b9dab7b31ce7d653bf0f8e8ed09256a37d3d50bbe0024b889c5583e1fb690b6
+  data.tar.gz: '082b158a7deec18b8ba1fededb03e7b1c08d7e744b03da7840d9e4af4234bb86b89d9266a560cc81e9d13a7f3a4bbca4f831a343fbaa18052acc3381075b4b8e'

data/lib/brute/agent.rb ADDED Viewed

@@ -0,0 +1,14 @@
+# frozen_string_literal: true
+module Brute
+  class Agent
+    attr_reader :provider, :model, :tools, :system_prompt
+    def initialize(provider:, model:, tools: Brute::Tools::ALL, system_prompt: nil)
+      @provider = provider
+      @model = model
+      @tools = tools
+      @system_prompt = system_prompt
+    end
+  end
+end

data/lib/brute/diff.rb CHANGED Viewed

@@ -1,5 +1,7 @@
 # frozen_string_literal: true
+require "bundler/setup"
+require "brute"
 require 'diff/lcs'
 require 'diff/lcs/hunk'
@@ -24,3 +26,25 @@ module Brute
     end
   end
 end
+test do
+  it "generates a unified diff for changed content" do
+    Brute::Diff.unified("line1\nold\nline3\n", "line1\nnew\nline3\n").should =~ /\-old/
+  end
+  it "includes additions in diff" do
+    Brute::Diff.unified("line1\nold\nline3\n", "line1\nnew\nline3\n").should =~ /\+new/
+  end
+  it "returns empty string for identical content" do
+    Brute::Diff.unified("same\ncontent\n", "same\ncontent\n").should == ""
+  end
+  it "handles empty old content (new file)" do
+    Brute::Diff.unified("", "new\ncontent\n").should =~ /\+new/
+  end
+  it "handles empty new content (deleted file)" do
+    Brute::Diff.unified("old\ncontent\n", "").should =~ /\-old/
+  end
+end

data/lib/brute/loop/agent_stream.rb ADDED Viewed

@@ -0,0 +1,118 @@
+# frozen_string_literal: true
+require "bundler/setup"
+require "brute"
+module Brute
+  module Loop
+  # Bridges llm.rb's streaming callbacks to the host application.
+  #
+  # Text and reasoning chunks fire immediately as the LLM generates them.
+  # Tool calls are collected but NOT executed — execution is deferred to the
+  # agent loop after the stream completes. This ensures text is never
+  # concurrent with tool execution.
+  #
+  # After the stream finishes, the agent loop reads +pending_tools+ to
+  # dispatch all tool calls concurrently, then fires +on_tool_call_start+
+  # once with the full batch.
+  #
+  class AgentStream < LLM::Stream
+    # Tool call metadata recorded during streaming, used by ToolUseGuard
+    # when ctx.functions is empty (nil-choice bug in llm.rb).
+    attr_reader :pending_tool_calls
+    # Deferred tool/error pairs: [(LLM::Function, error_or_nil), ...]
+    # The agent loop reads these after the stream completes.
+    attr_reader :pending_tools
+    def initialize(on_content: nil, on_reasoning: nil, on_question: nil)
+      @on_content = on_content
+      @on_reasoning = on_reasoning
+      @on_question = on_question
+      @pending_tool_calls = []
+      @pending_tools = []
+    end
+    # The on_question callback, needed by the agent loop to set
+    # thread/fiber-locals before tool execution.
+    attr_reader :on_question
+    def on_content(text)
+      @on_content&.call(text)
+    end
+    def on_reasoning_content(text)
+      @on_reasoning&.call(text)
+    end
+    # Called by llm.rb per tool as it arrives during streaming.
+    # Records only — no execution, no threads, no queue pushes.
+    def on_tool_call(tool, error)
+      @pending_tool_calls << { id: tool.id, name: tool.name, arguments: tool.arguments }
+      @pending_tools << [tool, error]
+    end
+    # Clear only the tool call metadata (used by ToolUseGuard after it
+    # has consumed the data for synthetic message injection).
+    def clear_pending_tool_calls!
+      @pending_tool_calls.clear
+    end
+    # Clear the deferred execution queue after the agent loop has
+    # consumed and dispatched all tool calls.
+    def clear_pending_tools!
+      @pending_tools.clear
+    end
+  end
+  end
+end
+test do
+  FakeTool = Struct.new(:id, :name, :arguments, keyword_init: true)
+  it "records tool in pending_tools" do
+    stream = Brute::Loop::AgentStream.new
+    tool = FakeTool.new(id: "toolu_1", name: "read", arguments: {})
+    stream.on_tool_call(tool, nil)
+    stream.pending_tools.size.should == 1
+  end
+  it "records tool call metadata" do
+    stream = Brute::Loop::AgentStream.new
+    tool = FakeTool.new(id: "toolu_abc", name: "read", arguments: { "file_path" => "test.rb" })
+    stream.on_tool_call(tool, nil)
+    stream.pending_tool_calls.first[:id].should == "toolu_abc"
+  end
+  it "records multiple tool calls" do
+    stream = Brute::Loop::AgentStream.new
+    t1 = FakeTool.new(id: "toolu_1", name: "read", arguments: {})
+    t2 = FakeTool.new(id: "toolu_2", name: "write", arguments: {})
+    stream.on_tool_call(t1, nil)
+    stream.on_tool_call(t2, nil)
+    stream.pending_tool_calls.size.should == 2
+  end
+  it "clears pending tool calls and tools" do
+    stream = Brute::Loop::AgentStream.new
+    tool = FakeTool.new(id: "toolu_1", name: "read", arguments: {})
+    stream.on_tool_call(tool, nil)
+    stream.clear_pending_tool_calls!
+    stream.clear_pending_tools!
+    stream.pending_tool_calls.should.be.empty
+  end
+  it "fires the content callback" do
+    received = nil
+    stream = Brute::Loop::AgentStream.new(on_content: ->(text) { received = text })
+    stream.on_content("hello")
+    received.should == "hello"
+  end
+  it "fires the reasoning callback" do
+    received = nil
+    stream = Brute::Loop::AgentStream.new(on_reasoning: ->(text) { received = text })
+    stream.on_reasoning_content("thinking...")
+    received.should == "thinking..."
+  end
+end

data/lib/brute/loop/agent_turn.rb ADDED Viewed

@@ -0,0 +1,520 @@
+# frozen_string_literal: true
+require "bundler/setup"
+require "brute"
+module Brute
+  module Loop
+  # Factory + namespace for provider-specific agent turns.
+  #
+  # An agent turn sends a message to the LLM, iterates over tool calls
+  # until there are none left, and returns the response. Each turn has
+  # its own job queue for tool execution (ParallelQueue of ToolCallSteps).
+  #
+  # Usage:
+  #
+  #   step = AgentTurn.perform(agent:, session:, pipeline:, input:)
+  #
+  # AgentTurn.perform detects the provider from the agent and returns
+  # the appropriate provider-specific Step subclass, already executed.
+  # The returned step has .state, .result, .error, etc.
+  #
+  # Provider-specific subclasses live under AgentTurn:: and override
+  # supported_messages to filter the session's message history per
+  # provider capability.
+  #
+  module AgentTurn
+    # Build and return the right AgentTurn step for this agent's provider.
+    # Does NOT execute it — call step.call(task) yourself, or enqueue it.
+    def self.new(agent:, session:, pipeline:, input: nil, callbacks: {}, **rest)
+      klass = detect(agent.provider)
+      klass.new(agent: agent, session: session, pipeline: pipeline, input: input, callbacks: callbacks, **rest)
+    end
+    # Build, execute inside a Sync block, return the finished step.
+    def self.perform(agent:, session:, pipeline:, input: nil, callbacks: {}, **rest)
+      step = self.new(agent: agent, session: session, pipeline: pipeline, input: input, callbacks: callbacks, **rest)
+      Sync do
+        step.call(Async::Task.current)
+      end
+      step
+    end
+    # Detect the right subclass from the provider.
+    def self.detect(provider)
+      if provider
+        provider.class.name.to_s.downcase.then do |class_name|
+          if class_name.include?("anthropic")
+            Anthropic
+          elsif class_name.include?("openai")
+            OpenAI
+          elsif class_name.include?("google") || class_name.include?("gemini")
+            Google
+          else
+            Base
+          end
+        end
+      else
+        Base
+      end
+    end
+    # The default implementation. Works for any provider.
+    # Provider-specific subclasses override supported_messages
+    # and anything else that differs.
+    #
+    # LLM::Context is built fresh for each pipeline call by the LLMCall
+    # middleware. The agent turn owns the conversation state via
+    # env[:messages] (an Array<LLM::Message>).
+    #
+    # Supports two modes:
+    #
+    #   Non-streaming (default): text arrives after the LLM call completes,
+    #   on_content fires post-hoc via LLMCall middleware, tool calls come
+    #   from env[:pending_functions].
+    #
+    #   Streaming: enabled when on_content or on_reasoning callbacks are
+    #   present. Text/reasoning fire incrementally via AgentStream. Tool
+    #   calls are deferred during the stream and collected afterward from
+    #   the stream's pending_tools.
+    #
+    # Callbacks:
+    #
+    #   on_content:         ->(text) {}     # text chunk (streaming) or full text (non-streaming)
+    #   on_reasoning:       ->(text) {}     # reasoning/thinking chunk (streaming only)
+    #   on_tool_call_start: ->(batch) {}    # [{name:, arguments:}, ...] before tool execution
+    #   on_tool_result:     ->(name, r) {}  # per-tool, after each completes
+    #   on_question:        ->(questions, queue) {}  # interactive; push answers onto queue
+    #
+    class Base < Step
+      MAX_ITERATIONS = 100
+      attr_reader :agent, :session
+      def initialize(agent:, session:, pipeline:, input: nil, callbacks: {}, **rest)
+        super(**rest)
+        @agent     = agent
+        @session   = session
+        @pipeline  = pipeline
+        @input     = input
+        @callbacks = callbacks
+        # Create streaming bridge when content or reasoning callbacks are
+        # present. The stream is passed into env so LLMCall can wire it
+        # into each fresh LLM::Context.
+        if @callbacks[:on_content] || @callbacks[:on_reasoning]
+          @stream = AgentStream.new(
+            on_content:   @callbacks[:on_content],
+            on_reasoning: @callbacks[:on_reasoning],
+            on_question:  @callbacks[:on_question],
+          )
+        end
+      end
+      def perform(task)
+        env = build_env
+        # First LLM call
+        env[:input] = build_initial_input(@input)
+        env[:tool_results] = nil
+        response = @pipeline.call(env)
+        iterations = 0
+        while !env[:should_exit] &&
+          (pending = collect_pending_tools(env)).any? &&
+          iterations < MAX_ITERATIONS
+          # Fire on_tool_call_start with the full batch
+          @callbacks[:on_tool_call_start]&.call(
+            pending.map { |fn, _| { name: fn.name, arguments: fn.arguments } }
+          )
+          # Partition: question tools run sequentially on this fiber,
+          # all others run in parallel via the sub-queue.
+          questions, others = pending.partition { |fn, _| fn.name == "question" }
+          results = []
+          # Questions first — sequential, blocking, with on_question fiber-local
+          questions.each do |fn, err|
+            if err
+              @callbacks[:on_tool_result]&.call(err.name, result_value(err))
+              results << err
+            else
+              Thread.current[:on_question] = @callbacks[:on_question]
+              result = fn.call
+              @callbacks[:on_tool_result]&.call(fn.name, result_value(result))
+              results << result
+            end
+          end
+          # Others — into the parallel queue
+          if others.any?
+            errors, executable = others.partition { |_, err| err }
+            # Record pre-existing errors (from stream's on_tool_call)
+            errors.each do |_, err|
+              @callbacks[:on_tool_result]&.call(err.name, result_value(err))
+              results << err
+            end
+            if executable.any?
+              tool_steps = executable.map { |fn, _| ToolCallStep.new(function: fn) }
+              tool_steps.each { |s| jobs(type: Brute::Queue::ParallelQueue) << s }
+              jobs.drain
+              tool_steps.each do |s|
+                val = s.state == :completed ? s.result : s.error
+                @callbacks[:on_tool_result]&.call(s.function.name, result_value(val))
+                results << val
+              end
+            end
+          end
+          # Feed results back to LLM
+          env[:input] = results
+          env[:tool_results] = results.filter_map { |r|
+            name = r.respond_to?(:name) ? r.name : "unknown"
+            [name, result_value(r)]
+          }
+          response = @pipeline.call(env)
+          # Re-create sub-queue for next iteration's tool calls
+          @mutex.synchronize { @jobs = nil }
+          iterations += 1
+        end
+        response
+      end
+      # Override in subclasses to filter message types per provider.
+      # Default: all messages pass through.
+      def supported_messages(messages)
+        messages
+      end
+      private
+      def build_env
+        {
+          provider:          @agent.provider,
+          model:             @agent.model,
+          input:             nil,
+          tools:             @agent.tools,
+          messages:          [],
+          stream:            @stream,
+          params:            {},
+          metadata:          {},
+          tool_results:      nil,
+          streaming:         !!@stream,
+          callbacks:         @callbacks,
+          should_exit:       nil,
+          pending_functions: [],
+        }
+      end
+      def build_initial_input(user_message)
+        sys = @agent.system_prompt
+        LLM::Prompt.new(@agent.provider) do |p|
+          p.system(sys) if sys
+          p.user(user_message) if user_message
+        end
+      end
+      # Collect pending tool calls from the stream (streaming mode) or
+      # from env[:pending_functions] (set by LLMCall after each call).
+      #
+      # Returns [(function, error_or_nil), ...] pairs.
+      # Clears the stream's deferred state after consumption.
+      def collect_pending_tools(env)
+        if @stream&.pending_tools&.any?
+          @stream.pending_tools.dup.tap { @stream.clear_pending_tools! }
+        elsif env[:pending_functions]&.any?
+          env[:pending_functions].dup.tap { env[:pending_functions] = [] }.map { |fn| [fn, nil] }
+        else
+          []
+        end
+      end
+      def result_value(result)
+        result.respond_to?(:value) ? result.value : result
+      end
+    end
+    # Provider-specific subclasses. Override supported_messages
+    # or loop behavior as needed.
+    class Anthropic < Base
+    end
+    class OpenAI < Base
+    end
+    class Google < Base
+    end
+  end
+  end
+end
+test do
+  require_relative "../../../spec/support/mock_provider"
+  require_relative "../../../spec/support/mock_response"
+  class RecordingPipeline
+    attr_reader :calls
+    def initialize(responses: [])
+      @responses = responses
+      @calls = []
+      @index = 0
+    end
+    def call(env)
+      @calls << env[:input]
+      resp = @responses[@index] || @responses.last
+      @index += 1
+      resp
+    end
+  end
+  FakeResponse = Struct.new(:content)
+  def make_agent(provider: MockProvider.new, tools: [])
+    Brute::Agent.new(provider: provider, model: nil, tools: tools)
+  end
+  # -- factory detection --
+  it "detects Base for unknown providers" do
+    Brute::Loop::AgentTurn.detect(MockProvider.new).should == Brute::Loop::AgentTurn::Base
+  end
+  it "detects Anthropic from provider class name" do
+    provider = MockProvider.new
+    def provider.class; Class.new { def self.name; "LLM::Anthropic"; end }; end
+    Brute::Loop::AgentTurn.detect(provider).should == Brute::Loop::AgentTurn::Anthropic
+  end
+  it "detects OpenAI from provider class name" do
+    provider = MockProvider.new
+    def provider.class; Class.new { def self.name; "LLM::OpenAI"; end }; end
+    Brute::Loop::AgentTurn.detect(provider).should == Brute::Loop::AgentTurn::OpenAI
+  end
+  it "detects Google from provider class name" do
+    provider = MockProvider.new
+    def provider.class; Class.new { def self.name; "LLM::Google"; end }; end
+    Brute::Loop::AgentTurn.detect(provider).should == Brute::Loop::AgentTurn::Google
+  end
+  # -- AgentTurn.new returns the right subclass --
+  it "returns Base instance for unknown provider" do
+    step = Brute::Loop::AgentTurn.new(
+      agent: make_agent,
+      session: Brute::Store::Session.new,
+      pipeline: RecordingPipeline.new(responses: []),
+      input: "hi",
+    )
+    step.should.be.kind_of Brute::Loop::AgentTurn::Base
+  end
+  # -- basic turn execution --
+  it "calls the pipeline" do
+    Sync do
+      pipeline = RecordingPipeline.new(responses: [FakeResponse.new("hello")])
+      step = Brute::Loop::AgentTurn.new(
+        agent: make_agent,
+        session: Brute::Store::Session.new,
+        pipeline: pipeline,
+        input: "hi",
+      )
+      step.call(Async::Task.current)
+      pipeline.calls.size.should == 1
+    end
+  end
+  it "returns the LLM response as result" do
+    Sync do
+      pipeline = RecordingPipeline.new(responses: [FakeResponse.new("world")])
+      step = Brute::Loop::AgentTurn.new(
+        agent: make_agent,
+        session: Brute::Store::Session.new,
+        pipeline: pipeline,
+        input: "hi",
+      )
+      step.call(Async::Task.current)
+      step.result.content.should == "world"
+    end
+  end
+  it "transitions to completed" do
+    Sync do
+      pipeline = RecordingPipeline.new(responses: [FakeResponse.new("ok")])
+      step = Brute::Loop::AgentTurn.new(
+        agent: make_agent,
+        session: Brute::Store::Session.new,
+        pipeline: pipeline,
+        input: "hi",
+      )
+      step.call(Async::Task.current)
+      step.state.should == :completed
+    end
+  end
+  # -- AgentTurn.perform convenience --
+  it "perform returns a completed step" do
+    pipeline = RecordingPipeline.new(responses: [FakeResponse.new("done")])
+    step = Brute::Loop::AgentTurn.perform(
+      agent: make_agent,
+      session: Brute::Store::Session.new,
+      pipeline: pipeline,
+      input: "hi",
+    )
+    step.state.should == :completed
+  end
+  # -- cancellation --
+  it "is cancellable when pending" do
+    step = Brute::Loop::AgentTurn.new(
+      agent: Brute::Agent.new(provider: nil, model: nil, tools: []),
+      session: Brute::Store::Session.new,
+      pipeline: RecordingPipeline.new(responses: []),
+      input: "hi",
+    )
+    step.cancel
+    step.state.should == :cancelled
+  end
+  # -- system prompt from agent --
+  it "uses agent system_prompt" do
+    Sync do
+      agent = Brute::Agent.new(
+        provider: MockProvider.new,
+        model: nil,
+        tools: [],
+        system_prompt: "You are a test bot",
+      )
+      pipeline = RecordingPipeline.new(responses: [FakeResponse.new("ok")])
+      step = Brute::Loop::AgentTurn.new(
+        agent: agent,
+        session: Brute::Store::Session.new,
+        pipeline: pipeline,
+        input: "hi",
+      )
+      step.call(Async::Task.current)
+      step.state.should == :completed
+    end
+  end
+  # -- should_exit loop break --
+  # A mock function that satisfies ToolCallStep's interface.
+  LoopTestFunction = Struct.new(:id, :name, :arguments, keyword_init: true) do
+    def call; self; end
+    def value; "tool_result"; end
+  end
+  # Pipeline that injects pending_functions and optionally sets should_exit.
+  class ShouldExitPipeline
+    attr_reader :call_count
+    def initialize(exit_on_call: nil)
+      @exit_on_call = exit_on_call
+      @call_count = 0
+      @fn = LoopTestFunction.new(id: "call_1", name: "test_tool", arguments: "{}")
+    end
+    def call(env)
+      @call_count += 1
+      # Always give pending functions so the loop would continue.
+      env[:pending_functions] = [@fn]
+      if @exit_on_call && @call_count >= @exit_on_call
+        env[:should_exit] = {
+          reason:  "test_exit",
+          message: "forced exit for test",
+          source:  "ShouldExitPipeline",
+        }
+      end
+      FakeResponse.new("response #{@call_count}")
+    end
+  end
+  it "breaks the loop when should_exit is set on the initial call" do
+    Sync do
+      pipeline = ShouldExitPipeline.new(exit_on_call: 1)
+      step = Brute::Loop::AgentTurn.new(
+        agent: make_agent,
+        session: Brute::Store::Session.new,
+        pipeline: pipeline,
+        input: "hi",
+      )
+      step.call(Async::Task.current)
+      # Pipeline called once (initial call). The loop never entered
+      # because should_exit was set before the while guard.
+      pipeline.call_count.should == 1
+      step.state.should == :completed
+    end
+  end
+  it "breaks the loop mid-iteration when should_exit is set" do
+    Sync do
+      # exit_on_call: 2 means the first call returns tools (loop enters),
+      # the second call (inside the loop) sets should_exit.
+      pipeline = ShouldExitPipeline.new(exit_on_call: 2)
+      step = Brute::Loop::AgentTurn.new(
+        agent: make_agent,
+        session: Brute::Store::Session.new,
+        pipeline: pipeline,
+        input: "hi",
+      )
+      step.call(Async::Task.current)
+      # Two calls: initial + one loop iteration. The loop did not
+      # continue to a third call because should_exit was set.
+      pipeline.call_count.should == 2
+      step.state.should == :completed
+    end
+  end
+  it "loops normally when should_exit is not set" do
+    Sync do
+      call_count = 0
+      fn = LoopTestFunction.new(id: "call_1", name: "test_tool", arguments: "{}")
+      pipeline_obj = Object.new
+      pipeline_obj.define_singleton_method(:call_count) { call_count }
+      pipeline_obj.define_singleton_method(:call) do |env|
+        call_count += 1
+        if call_count <= 3
+          env[:pending_functions] = [fn]
+        else
+          env[:pending_functions] = []
+        end
+        FakeResponse.new("response #{call_count}")
+      end
+      step = Brute::Loop::AgentTurn.new(
+        agent: make_agent,
+        session: Brute::Store::Session.new,
+        pipeline: pipeline_obj,
+        input: "hi",
+      )
+      step.call(Async::Task.current)
+      # Call 1 (initial) → pending_functions has fn → loop enters
+      # Loop iter 1: execute tools, call pipeline (call 2) → still has fn → continues
+      # Loop iter 2: execute tools, call pipeline (call 3) → still has fn → continues
+      # Loop iter 3: execute tools, call pipeline (call 4) → empty → exits
+      call_count.should == 4
+      step.state.should == :completed
+    end
+  end
+end