brute 0.4.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/lib/brute/agent.rb +14 -0
  3. data/lib/brute/diff.rb +24 -0
  4. data/lib/brute/loop/agent_stream.rb +118 -0
  5. data/lib/brute/loop/agent_turn.rb +520 -0
  6. data/lib/brute/{compactor.rb → loop/compactor.rb} +2 -0
  7. data/lib/brute/{doom_loop.rb → loop/doom_loop.rb} +2 -0
  8. data/lib/brute/loop/step.rb +332 -0
  9. data/lib/brute/loop/tool_call_step.rb +90 -0
  10. data/lib/brute/middleware/compaction_check.rb +70 -23
  11. data/lib/brute/middleware/doom_loop_detection.rb +110 -7
  12. data/lib/brute/middleware/llm_call.rb +88 -1
  13. data/lib/brute/middleware/message_tracking.rb +140 -10
  14. data/lib/brute/middleware/otel/span.rb +32 -2
  15. data/lib/brute/middleware/otel/token_usage.rb +38 -0
  16. data/lib/brute/middleware/otel/tool_calls.rb +30 -1
  17. data/lib/brute/middleware/otel/tool_results.rb +29 -1
  18. data/lib/brute/middleware/otel.rb +5 -0
  19. data/lib/brute/middleware/reasoning_normalizer.rb +94 -0
  20. data/lib/brute/middleware/retry.rb +113 -1
  21. data/lib/brute/middleware/session_persistence.rb +46 -3
  22. data/lib/brute/middleware/token_tracking.rb +78 -0
  23. data/lib/brute/middleware/tool_error_tracking.rb +128 -1
  24. data/lib/brute/middleware/tool_use_guard.rb +64 -28
  25. data/lib/brute/middleware/tracing.rb +63 -2
  26. data/lib/brute/middleware.rb +18 -0
  27. data/lib/brute/orchestrator/turn.rb +105 -0
  28. data/lib/brute/patches/buffer_nil_guard.rb +5 -0
  29. data/lib/brute/pipeline.rb +86 -7
  30. data/lib/brute/prompts/build_switch.rb +29 -0
  31. data/lib/brute/prompts/environment.rb +43 -0
  32. data/lib/brute/prompts/identity.rb +29 -0
  33. data/lib/brute/prompts/instructions.rb +21 -0
  34. data/lib/brute/prompts/max_steps.rb +25 -0
  35. data/lib/brute/prompts/plan_reminder.rb +25 -0
  36. data/lib/brute/prompts/skills.rb +13 -0
  37. data/lib/brute/prompts.rb +28 -0
  38. data/lib/brute/providers/ollama.rb +135 -0
  39. data/lib/brute/providers/opencode_go.rb +5 -0
  40. data/lib/brute/providers/opencode_zen.rb +7 -2
  41. data/lib/brute/providers/shell.rb +2 -2
  42. data/lib/brute/providers/shell_response.rb +7 -2
  43. data/lib/brute/providers.rb +62 -0
  44. data/lib/brute/queue/base_queue.rb +222 -0
  45. data/lib/brute/{file_mutation_queue.rb → queue/file_mutation_queue.rb} +28 -26
  46. data/lib/brute/queue/parallel_queue.rb +66 -0
  47. data/lib/brute/queue/sequential_queue.rb +63 -0
  48. data/lib/brute/{message_store.rb → store/message_store.rb} +155 -62
  49. data/lib/brute/store/session.rb +106 -0
  50. data/lib/brute/{snapshot_store.rb → store/snapshot_store.rb} +2 -0
  51. data/lib/brute/{todo_store.rb → store/todo_store.rb} +2 -0
  52. data/lib/brute/system_prompt.rb +101 -0
  53. data/lib/brute/tools/delegate.rb +59 -0
  54. data/lib/brute/tools/fs_patch.rb +54 -2
  55. data/lib/brute/tools/fs_read.rb +5 -0
  56. data/lib/brute/tools/fs_remove.rb +7 -2
  57. data/lib/brute/tools/fs_search.rb +5 -0
  58. data/lib/brute/tools/fs_undo.rb +7 -2
  59. data/lib/brute/tools/fs_write.rb +40 -2
  60. data/lib/brute/tools/net_fetch.rb +5 -0
  61. data/lib/brute/tools/question.rb +5 -0
  62. data/lib/brute/tools/shell.rb +5 -0
  63. data/lib/brute/tools/todo_read.rb +6 -1
  64. data/lib/brute/tools/todo_write.rb +6 -1
  65. data/lib/brute/tools.rb +31 -0
  66. data/lib/brute/version.rb +1 -1
  67. data/lib/brute.rb +40 -204
  68. metadata +31 -20
  69. data/lib/brute/agent_stream.rb +0 -63
  70. data/lib/brute/hooks.rb +0 -84
  71. data/lib/brute/orchestrator.rb +0 -391
  72. data/lib/brute/session.rb +0 -161
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e6fa4c53a825578634b110724522c021f089595e75e80faea05b5c53697010dd
4
- data.tar.gz: 1cff09cf5e255928aada4f09a11c2f77ccf873839ee4f6d0ba24bc12beaefeba
3
+ metadata.gz: a3594fba62fc4a71baaaf36b878eb1a3c02a3c06f7b0b2517b434556468bcde5
4
+ data.tar.gz: d8f74e82c95d7698c11ecbe5792f57ace4739a81f4d625d3cb729123eb0b7179
5
5
  SHA512:
6
- metadata.gz: 795a6b851f47daba23755f8791f98c4c54f1c738704748767e70ff0bf25b797dca15fc25892642b7b46c7f6c8acab83d5dd110b0741e4252e8e8b1ce8798ffa1
7
- data.tar.gz: 827d9628e7d5142fe1eaabc5e3de47cf04468afa5e1985a9af6b7ccc16e471ce35236953d3b746e988ef34a779df3cd4b1e6821ca9cd45815fc302785d8d1a00
6
+ metadata.gz: 861ab5262a21c876fa6592d1fc22612c39aada6e33a30e35ce81adb1bbbdfa978b9dab7b31ce7d653bf0f8e8ed09256a37d3d50bbe0024b889c5583e1fb690b6
7
+ data.tar.gz: '082b158a7deec18b8ba1fededb03e7b1c08d7e744b03da7840d9e4af4234bb86b89d9266a560cc81e9d13a7f3a4bbca4f831a343fbaa18052acc3381075b4b8e'
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Brute
4
+ class Agent
5
+ attr_reader :provider, :model, :tools, :system_prompt
6
+
7
+ def initialize(provider:, model:, tools: Brute::Tools::ALL, system_prompt: nil)
8
+ @provider = provider
9
+ @model = model
10
+ @tools = tools
11
+ @system_prompt = system_prompt
12
+ end
13
+ end
14
+ end
data/lib/brute/diff.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "bundler/setup"
4
+ require "brute"
3
5
  require 'diff/lcs'
4
6
  require 'diff/lcs/hunk'
5
7
 
@@ -24,3 +26,25 @@ module Brute
24
26
  end
25
27
  end
26
28
  end
29
+
30
+ test do
31
+ it "generates a unified diff for changed content" do
32
+ Brute::Diff.unified("line1\nold\nline3\n", "line1\nnew\nline3\n").should =~ /\-old/
33
+ end
34
+
35
+ it "includes additions in diff" do
36
+ Brute::Diff.unified("line1\nold\nline3\n", "line1\nnew\nline3\n").should =~ /\+new/
37
+ end
38
+
39
+ it "returns empty string for identical content" do
40
+ Brute::Diff.unified("same\ncontent\n", "same\ncontent\n").should == ""
41
+ end
42
+
43
+ it "handles empty old content (new file)" do
44
+ Brute::Diff.unified("", "new\ncontent\n").should =~ /\+new/
45
+ end
46
+
47
+ it "handles empty new content (deleted file)" do
48
+ Brute::Diff.unified("old\ncontent\n", "").should =~ /\-old/
49
+ end
50
+ end
@@ -0,0 +1,118 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/setup"
4
+ require "brute"
5
+
6
+ module Brute
7
+ module Loop
8
+ # Bridges llm.rb's streaming callbacks to the host application.
9
+ #
10
+ # Text and reasoning chunks fire immediately as the LLM generates them.
11
+ # Tool calls are collected but NOT executed — execution is deferred to the
12
+ # agent loop after the stream completes. This ensures text is never
13
+ # concurrent with tool execution.
14
+ #
15
+ # After the stream finishes, the agent loop reads +pending_tools+ to
16
+ # dispatch all tool calls concurrently, then fires +on_tool_call_start+
17
+ # once with the full batch.
18
+ #
19
+ class AgentStream < LLM::Stream
20
+ # Tool call metadata recorded during streaming, used by ToolUseGuard
21
+ # when ctx.functions is empty (nil-choice bug in llm.rb).
22
+ attr_reader :pending_tool_calls
23
+
24
+ # Deferred tool/error pairs: [(LLM::Function, error_or_nil), ...]
25
+ # The agent loop reads these after the stream completes.
26
+ attr_reader :pending_tools
27
+
28
+ def initialize(on_content: nil, on_reasoning: nil, on_question: nil)
29
+ @on_content = on_content
30
+ @on_reasoning = on_reasoning
31
+ @on_question = on_question
32
+ @pending_tool_calls = []
33
+ @pending_tools = []
34
+ end
35
+
36
+ # The on_question callback, needed by the agent loop to set
37
+ # thread/fiber-locals before tool execution.
38
+ attr_reader :on_question
39
+
40
+ def on_content(text)
41
+ @on_content&.call(text)
42
+ end
43
+
44
+ def on_reasoning_content(text)
45
+ @on_reasoning&.call(text)
46
+ end
47
+
48
+ # Called by llm.rb per tool as it arrives during streaming.
49
+ # Records only — no execution, no threads, no queue pushes.
50
+ def on_tool_call(tool, error)
51
+ @pending_tool_calls << { id: tool.id, name: tool.name, arguments: tool.arguments }
52
+ @pending_tools << [tool, error]
53
+ end
54
+
55
+ # Clear only the tool call metadata (used by ToolUseGuard after it
56
+ # has consumed the data for synthetic message injection).
57
+ def clear_pending_tool_calls!
58
+ @pending_tool_calls.clear
59
+ end
60
+
61
+ # Clear the deferred execution queue after the agent loop has
62
+ # consumed and dispatched all tool calls.
63
+ def clear_pending_tools!
64
+ @pending_tools.clear
65
+ end
66
+ end
67
+ end
68
+ end
69
+
70
+ test do
71
+ FakeTool = Struct.new(:id, :name, :arguments, keyword_init: true)
72
+
73
+ it "records tool in pending_tools" do
74
+ stream = Brute::Loop::AgentStream.new
75
+ tool = FakeTool.new(id: "toolu_1", name: "read", arguments: {})
76
+ stream.on_tool_call(tool, nil)
77
+ stream.pending_tools.size.should == 1
78
+ end
79
+
80
+ it "records tool call metadata" do
81
+ stream = Brute::Loop::AgentStream.new
82
+ tool = FakeTool.new(id: "toolu_abc", name: "read", arguments: { "file_path" => "test.rb" })
83
+ stream.on_tool_call(tool, nil)
84
+ stream.pending_tool_calls.first[:id].should == "toolu_abc"
85
+ end
86
+
87
+ it "records multiple tool calls" do
88
+ stream = Brute::Loop::AgentStream.new
89
+ t1 = FakeTool.new(id: "toolu_1", name: "read", arguments: {})
90
+ t2 = FakeTool.new(id: "toolu_2", name: "write", arguments: {})
91
+ stream.on_tool_call(t1, nil)
92
+ stream.on_tool_call(t2, nil)
93
+ stream.pending_tool_calls.size.should == 2
94
+ end
95
+
96
+ it "clears pending tool calls and tools" do
97
+ stream = Brute::Loop::AgentStream.new
98
+ tool = FakeTool.new(id: "toolu_1", name: "read", arguments: {})
99
+ stream.on_tool_call(tool, nil)
100
+ stream.clear_pending_tool_calls!
101
+ stream.clear_pending_tools!
102
+ stream.pending_tool_calls.should.be.empty
103
+ end
104
+
105
+ it "fires the content callback" do
106
+ received = nil
107
+ stream = Brute::Loop::AgentStream.new(on_content: ->(text) { received = text })
108
+ stream.on_content("hello")
109
+ received.should == "hello"
110
+ end
111
+
112
+ it "fires the reasoning callback" do
113
+ received = nil
114
+ stream = Brute::Loop::AgentStream.new(on_reasoning: ->(text) { received = text })
115
+ stream.on_reasoning_content("thinking...")
116
+ received.should == "thinking..."
117
+ end
118
+ end
@@ -0,0 +1,520 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/setup"
4
+ require "brute"
5
+
6
+ module Brute
7
+ module Loop
8
+ # Factory + namespace for provider-specific agent turns.
9
+ #
10
+ # An agent turn sends a message to the LLM, iterates over tool calls
11
+ # until there are none left, and returns the response. Each turn has
12
+ # its own job queue for tool execution (ParallelQueue of ToolCallSteps).
13
+ #
14
+ # Usage:
15
+ #
16
+ # step = AgentTurn.perform(agent:, session:, pipeline:, input:)
17
+ #
18
+ # AgentTurn.perform detects the provider from the agent and returns
19
+ # the appropriate provider-specific Step subclass, already executed.
20
+ # The returned step has .state, .result, .error, etc.
21
+ #
22
+ # Provider-specific subclasses live under AgentTurn:: and override
23
+ # supported_messages to filter the session's message history per
24
+ # provider capability.
25
+ #
26
+ module AgentTurn
27
+ # Build and return the right AgentTurn step for this agent's provider.
28
+ # Does NOT execute it — call step.call(task) yourself, or enqueue it.
29
+ def self.new(agent:, session:, pipeline:, input: nil, callbacks: {}, **rest)
30
+ klass = detect(agent.provider)
31
+ klass.new(agent: agent, session: session, pipeline: pipeline, input: input, callbacks: callbacks, **rest)
32
+ end
33
+
34
+ # Build, execute inside a Sync block, return the finished step.
35
+ def self.perform(agent:, session:, pipeline:, input: nil, callbacks: {}, **rest)
36
+ step = self.new(agent: agent, session: session, pipeline: pipeline, input: input, callbacks: callbacks, **rest)
37
+ Sync do
38
+ step.call(Async::Task.current)
39
+ end
40
+ step
41
+ end
42
+
43
+ # Detect the right subclass from the provider.
44
+ def self.detect(provider)
45
+ if provider
46
+ provider.class.name.to_s.downcase.then do |class_name|
47
+ if class_name.include?("anthropic")
48
+ Anthropic
49
+ elsif class_name.include?("openai")
50
+ OpenAI
51
+ elsif class_name.include?("google") || class_name.include?("gemini")
52
+ Google
53
+ else
54
+ Base
55
+ end
56
+ end
57
+ else
58
+ Base
59
+ end
60
+ end
61
+
62
+ # The default implementation. Works for any provider.
63
+ # Provider-specific subclasses override supported_messages
64
+ # and anything else that differs.
65
+ #
66
+ # LLM::Context is built fresh for each pipeline call by the LLMCall
67
+ # middleware. The agent turn owns the conversation state via
68
+ # env[:messages] (an Array<LLM::Message>).
69
+ #
70
+ # Supports two modes:
71
+ #
72
+ # Non-streaming (default): text arrives after the LLM call completes,
73
+ # on_content fires post-hoc via LLMCall middleware, tool calls come
74
+ # from env[:pending_functions].
75
+ #
76
+ # Streaming: enabled when on_content or on_reasoning callbacks are
77
+ # present. Text/reasoning fire incrementally via AgentStream. Tool
78
+ # calls are deferred during the stream and collected afterward from
79
+ # the stream's pending_tools.
80
+ #
81
+ # Callbacks:
82
+ #
83
+ # on_content: ->(text) {} # text chunk (streaming) or full text (non-streaming)
84
+ # on_reasoning: ->(text) {} # reasoning/thinking chunk (streaming only)
85
+ # on_tool_call_start: ->(batch) {} # [{name:, arguments:}, ...] before tool execution
86
+ # on_tool_result: ->(name, r) {} # per-tool, after each completes
87
+ # on_question: ->(questions, queue) {} # interactive; push answers onto queue
88
+ #
89
+ class Base < Step
90
+ MAX_ITERATIONS = 100
91
+
92
+ attr_reader :agent, :session
93
+
94
+ def initialize(agent:, session:, pipeline:, input: nil, callbacks: {}, **rest)
95
+ super(**rest)
96
+ @agent = agent
97
+ @session = session
98
+ @pipeline = pipeline
99
+ @input = input
100
+ @callbacks = callbacks
101
+
102
+ # Create streaming bridge when content or reasoning callbacks are
103
+ # present. The stream is passed into env so LLMCall can wire it
104
+ # into each fresh LLM::Context.
105
+ if @callbacks[:on_content] || @callbacks[:on_reasoning]
106
+ @stream = AgentStream.new(
107
+ on_content: @callbacks[:on_content],
108
+ on_reasoning: @callbacks[:on_reasoning],
109
+ on_question: @callbacks[:on_question],
110
+ )
111
+ end
112
+ end
113
+
114
+ def perform(task)
115
+ env = build_env
116
+
117
+ # First LLM call
118
+ env[:input] = build_initial_input(@input)
119
+ env[:tool_results] = nil
120
+ response = @pipeline.call(env)
121
+
122
+ iterations = 0
123
+ while !env[:should_exit] &&
124
+ (pending = collect_pending_tools(env)).any? &&
125
+ iterations < MAX_ITERATIONS
126
+
127
+ # Fire on_tool_call_start with the full batch
128
+ @callbacks[:on_tool_call_start]&.call(
129
+ pending.map { |fn, _| { name: fn.name, arguments: fn.arguments } }
130
+ )
131
+
132
+ # Partition: question tools run sequentially on this fiber,
133
+ # all others run in parallel via the sub-queue.
134
+ questions, others = pending.partition { |fn, _| fn.name == "question" }
135
+
136
+ results = []
137
+
138
+ # Questions first — sequential, blocking, with on_question fiber-local
139
+ questions.each do |fn, err|
140
+ if err
141
+ @callbacks[:on_tool_result]&.call(err.name, result_value(err))
142
+ results << err
143
+ else
144
+ Thread.current[:on_question] = @callbacks[:on_question]
145
+ result = fn.call
146
+ @callbacks[:on_tool_result]&.call(fn.name, result_value(result))
147
+ results << result
148
+ end
149
+ end
150
+
151
+ # Others — into the parallel queue
152
+ if others.any?
153
+ errors, executable = others.partition { |_, err| err }
154
+
155
+ # Record pre-existing errors (from stream's on_tool_call)
156
+ errors.each do |_, err|
157
+ @callbacks[:on_tool_result]&.call(err.name, result_value(err))
158
+ results << err
159
+ end
160
+
161
+ if executable.any?
162
+ tool_steps = executable.map { |fn, _| ToolCallStep.new(function: fn) }
163
+ tool_steps.each { |s| jobs(type: Brute::Queue::ParallelQueue) << s }
164
+ jobs.drain
165
+
166
+ tool_steps.each do |s|
167
+ val = s.state == :completed ? s.result : s.error
168
+ @callbacks[:on_tool_result]&.call(s.function.name, result_value(val))
169
+ results << val
170
+ end
171
+ end
172
+ end
173
+
174
+ # Feed results back to LLM
175
+ env[:input] = results
176
+ env[:tool_results] = results.filter_map { |r|
177
+ name = r.respond_to?(:name) ? r.name : "unknown"
178
+ [name, result_value(r)]
179
+ }
180
+ response = @pipeline.call(env)
181
+
182
+ # Re-create sub-queue for next iteration's tool calls
183
+ @mutex.synchronize { @jobs = nil }
184
+ iterations += 1
185
+ end
186
+
187
+ response
188
+ end
189
+
190
+ # Override in subclasses to filter message types per provider.
191
+ # Default: all messages pass through.
192
+ def supported_messages(messages)
193
+ messages
194
+ end
195
+
196
+ private
197
+
198
+ def build_env
199
+ {
200
+ provider: @agent.provider,
201
+ model: @agent.model,
202
+ input: nil,
203
+ tools: @agent.tools,
204
+ messages: [],
205
+ stream: @stream,
206
+ params: {},
207
+ metadata: {},
208
+ tool_results: nil,
209
+ streaming: !!@stream,
210
+ callbacks: @callbacks,
211
+ should_exit: nil,
212
+ pending_functions: [],
213
+ }
214
+ end
215
+
216
+ def build_initial_input(user_message)
217
+ sys = @agent.system_prompt
218
+ LLM::Prompt.new(@agent.provider) do |p|
219
+ p.system(sys) if sys
220
+ p.user(user_message) if user_message
221
+ end
222
+ end
223
+
224
+ # Collect pending tool calls from the stream (streaming mode) or
225
+ # from env[:pending_functions] (set by LLMCall after each call).
226
+ #
227
+ # Returns [(function, error_or_nil), ...] pairs.
228
+ # Clears the stream's deferred state after consumption.
229
+ def collect_pending_tools(env)
230
+ if @stream&.pending_tools&.any?
231
+ @stream.pending_tools.dup.tap { @stream.clear_pending_tools! }
232
+ elsif env[:pending_functions]&.any?
233
+ env[:pending_functions].dup.tap { env[:pending_functions] = [] }.map { |fn| [fn, nil] }
234
+ else
235
+ []
236
+ end
237
+ end
238
+
239
+ def result_value(result)
240
+ result.respond_to?(:value) ? result.value : result
241
+ end
242
+ end
243
+
244
+ # Provider-specific subclasses. Override supported_messages
245
+ # or loop behavior as needed.
246
+
247
+ class Anthropic < Base
248
+ end
249
+
250
+ class OpenAI < Base
251
+ end
252
+
253
+ class Google < Base
254
+ end
255
+ end
256
+ end
257
+ end
258
+
259
+ test do
260
+ require_relative "../../../spec/support/mock_provider"
261
+ require_relative "../../../spec/support/mock_response"
262
+
263
+ class RecordingPipeline
264
+ attr_reader :calls
265
+ def initialize(responses: [])
266
+ @responses = responses
267
+ @calls = []
268
+ @index = 0
269
+ end
270
+
271
+ def call(env)
272
+ @calls << env[:input]
273
+ resp = @responses[@index] || @responses.last
274
+ @index += 1
275
+ resp
276
+ end
277
+ end
278
+
279
+ FakeResponse = Struct.new(:content)
280
+
281
+ def make_agent(provider: MockProvider.new, tools: [])
282
+ Brute::Agent.new(provider: provider, model: nil, tools: tools)
283
+ end
284
+
285
+ # -- factory detection --
286
+
287
+ it "detects Base for unknown providers" do
288
+ Brute::Loop::AgentTurn.detect(MockProvider.new).should == Brute::Loop::AgentTurn::Base
289
+ end
290
+
291
+ it "detects Anthropic from provider class name" do
292
+ provider = MockProvider.new
293
+ def provider.class; Class.new { def self.name; "LLM::Anthropic"; end }; end
294
+ Brute::Loop::AgentTurn.detect(provider).should == Brute::Loop::AgentTurn::Anthropic
295
+ end
296
+
297
+ it "detects OpenAI from provider class name" do
298
+ provider = MockProvider.new
299
+ def provider.class; Class.new { def self.name; "LLM::OpenAI"; end }; end
300
+ Brute::Loop::AgentTurn.detect(provider).should == Brute::Loop::AgentTurn::OpenAI
301
+ end
302
+
303
+ it "detects Google from provider class name" do
304
+ provider = MockProvider.new
305
+ def provider.class; Class.new { def self.name; "LLM::Google"; end }; end
306
+ Brute::Loop::AgentTurn.detect(provider).should == Brute::Loop::AgentTurn::Google
307
+ end
308
+
309
+ # -- AgentTurn.new returns the right subclass --
310
+
311
+ it "returns Base instance for unknown provider" do
312
+ step = Brute::Loop::AgentTurn.new(
313
+ agent: make_agent,
314
+ session: Brute::Store::Session.new,
315
+ pipeline: RecordingPipeline.new(responses: []),
316
+ input: "hi",
317
+ )
318
+ step.should.be.kind_of Brute::Loop::AgentTurn::Base
319
+ end
320
+
321
+ # -- basic turn execution --
322
+
323
+ it "calls the pipeline" do
324
+ Sync do
325
+ pipeline = RecordingPipeline.new(responses: [FakeResponse.new("hello")])
326
+ step = Brute::Loop::AgentTurn.new(
327
+ agent: make_agent,
328
+ session: Brute::Store::Session.new,
329
+ pipeline: pipeline,
330
+ input: "hi",
331
+ )
332
+ step.call(Async::Task.current)
333
+ pipeline.calls.size.should == 1
334
+ end
335
+ end
336
+
337
+ it "returns the LLM response as result" do
338
+ Sync do
339
+ pipeline = RecordingPipeline.new(responses: [FakeResponse.new("world")])
340
+ step = Brute::Loop::AgentTurn.new(
341
+ agent: make_agent,
342
+ session: Brute::Store::Session.new,
343
+ pipeline: pipeline,
344
+ input: "hi",
345
+ )
346
+ step.call(Async::Task.current)
347
+ step.result.content.should == "world"
348
+ end
349
+ end
350
+
351
+ it "transitions to completed" do
352
+ Sync do
353
+ pipeline = RecordingPipeline.new(responses: [FakeResponse.new("ok")])
354
+ step = Brute::Loop::AgentTurn.new(
355
+ agent: make_agent,
356
+ session: Brute::Store::Session.new,
357
+ pipeline: pipeline,
358
+ input: "hi",
359
+ )
360
+ step.call(Async::Task.current)
361
+ step.state.should == :completed
362
+ end
363
+ end
364
+
365
+ # -- AgentTurn.perform convenience --
366
+
367
+ it "perform returns a completed step" do
368
+ pipeline = RecordingPipeline.new(responses: [FakeResponse.new("done")])
369
+ step = Brute::Loop::AgentTurn.perform(
370
+ agent: make_agent,
371
+ session: Brute::Store::Session.new,
372
+ pipeline: pipeline,
373
+ input: "hi",
374
+ )
375
+ step.state.should == :completed
376
+ end
377
+
378
+ # -- cancellation --
379
+
380
+ it "is cancellable when pending" do
381
+ step = Brute::Loop::AgentTurn.new(
382
+ agent: Brute::Agent.new(provider: nil, model: nil, tools: []),
383
+ session: Brute::Store::Session.new,
384
+ pipeline: RecordingPipeline.new(responses: []),
385
+ input: "hi",
386
+ )
387
+ step.cancel
388
+ step.state.should == :cancelled
389
+ end
390
+
391
+ # -- system prompt from agent --
392
+
393
+ it "uses agent system_prompt" do
394
+ Sync do
395
+ agent = Brute::Agent.new(
396
+ provider: MockProvider.new,
397
+ model: nil,
398
+ tools: [],
399
+ system_prompt: "You are a test bot",
400
+ )
401
+ pipeline = RecordingPipeline.new(responses: [FakeResponse.new("ok")])
402
+ step = Brute::Loop::AgentTurn.new(
403
+ agent: agent,
404
+ session: Brute::Store::Session.new,
405
+ pipeline: pipeline,
406
+ input: "hi",
407
+ )
408
+ step.call(Async::Task.current)
409
+ step.state.should == :completed
410
+ end
411
+ end
412
+
413
+ # -- should_exit loop break --
414
+
415
+ # A mock function that satisfies ToolCallStep's interface.
416
+ LoopTestFunction = Struct.new(:id, :name, :arguments, keyword_init: true) do
417
+ def call; self; end
418
+ def value; "tool_result"; end
419
+ end
420
+
421
+ # Pipeline that injects pending_functions and optionally sets should_exit.
422
+ class ShouldExitPipeline
423
+ attr_reader :call_count
424
+
425
+ def initialize(exit_on_call: nil)
426
+ @exit_on_call = exit_on_call
427
+ @call_count = 0
428
+ @fn = LoopTestFunction.new(id: "call_1", name: "test_tool", arguments: "{}")
429
+ end
430
+
431
+ def call(env)
432
+ @call_count += 1
433
+
434
+ # Always give pending functions so the loop would continue.
435
+ env[:pending_functions] = [@fn]
436
+
437
+ if @exit_on_call && @call_count >= @exit_on_call
438
+ env[:should_exit] = {
439
+ reason: "test_exit",
440
+ message: "forced exit for test",
441
+ source: "ShouldExitPipeline",
442
+ }
443
+ end
444
+
445
+ FakeResponse.new("response #{@call_count}")
446
+ end
447
+ end
448
+
449
+ it "breaks the loop when should_exit is set on the initial call" do
450
+ Sync do
451
+ pipeline = ShouldExitPipeline.new(exit_on_call: 1)
452
+ step = Brute::Loop::AgentTurn.new(
453
+ agent: make_agent,
454
+ session: Brute::Store::Session.new,
455
+ pipeline: pipeline,
456
+ input: "hi",
457
+ )
458
+ step.call(Async::Task.current)
459
+
460
+ # Pipeline called once (initial call). The loop never entered
461
+ # because should_exit was set before the while guard.
462
+ pipeline.call_count.should == 1
463
+ step.state.should == :completed
464
+ end
465
+ end
466
+
467
+ it "breaks the loop mid-iteration when should_exit is set" do
468
+ Sync do
469
+ # exit_on_call: 2 means the first call returns tools (loop enters),
470
+ # the second call (inside the loop) sets should_exit.
471
+ pipeline = ShouldExitPipeline.new(exit_on_call: 2)
472
+ step = Brute::Loop::AgentTurn.new(
473
+ agent: make_agent,
474
+ session: Brute::Store::Session.new,
475
+ pipeline: pipeline,
476
+ input: "hi",
477
+ )
478
+ step.call(Async::Task.current)
479
+
480
+ # Two calls: initial + one loop iteration. The loop did not
481
+ # continue to a third call because should_exit was set.
482
+ pipeline.call_count.should == 2
483
+ step.state.should == :completed
484
+ end
485
+ end
486
+
487
+ it "loops normally when should_exit is not set" do
488
+ Sync do
489
+ call_count = 0
490
+ fn = LoopTestFunction.new(id: "call_1", name: "test_tool", arguments: "{}")
491
+
492
+ pipeline_obj = Object.new
493
+ pipeline_obj.define_singleton_method(:call_count) { call_count }
494
+ pipeline_obj.define_singleton_method(:call) do |env|
495
+ call_count += 1
496
+ if call_count <= 3
497
+ env[:pending_functions] = [fn]
498
+ else
499
+ env[:pending_functions] = []
500
+ end
501
+ FakeResponse.new("response #{call_count}")
502
+ end
503
+
504
+ step = Brute::Loop::AgentTurn.new(
505
+ agent: make_agent,
506
+ session: Brute::Store::Session.new,
507
+ pipeline: pipeline_obj,
508
+ input: "hi",
509
+ )
510
+ step.call(Async::Task.current)
511
+
512
+ # Call 1 (initial) → pending_functions has fn → loop enters
513
+ # Loop iter 1: execute tools, call pipeline (call 2) → still has fn → continues
514
+ # Loop iter 2: execute tools, call pipeline (call 3) → still has fn → continues
515
+ # Loop iter 3: execute tools, call pipeline (call 4) → empty → exits
516
+ call_count.should == 4
517
+ step.state.should == :completed
518
+ end
519
+ end
520
+ end