brute 0.4.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +4 -4
  2. data/lib/brute/agent.rb +14 -0
  3. data/lib/brute/diff.rb +18 -28
  4. data/lib/brute/loop/agent_stream.rb +118 -0
  5. data/lib/brute/loop/agent_turn.rb +520 -0
  6. data/lib/brute/{compactor.rb → loop/compactor.rb} +2 -0
  7. data/lib/brute/{doom_loop.rb → loop/doom_loop.rb} +2 -0
  8. data/lib/brute/loop/step.rb +332 -0
  9. data/lib/brute/loop/tool_call_step.rb +90 -0
  10. data/lib/brute/middleware/compaction_check.rb +60 -146
  11. data/lib/brute/middleware/doom_loop_detection.rb +95 -92
  12. data/lib/brute/middleware/llm_call.rb +78 -80
  13. data/lib/brute/middleware/message_tracking.rb +115 -162
  14. data/lib/brute/middleware/otel/span.rb +25 -106
  15. data/lib/brute/middleware/otel/token_usage.rb +29 -84
  16. data/lib/brute/middleware/otel/tool_calls.rb +23 -107
  17. data/lib/brute/middleware/otel/tool_results.rb +22 -86
  18. data/lib/brute/middleware/reasoning_normalizer.rb +78 -103
  19. data/lib/brute/middleware/retry.rb +95 -76
  20. data/lib/brute/middleware/session_persistence.rb +38 -37
  21. data/lib/brute/middleware/token_tracking.rb +64 -63
  22. data/lib/brute/middleware/tool_error_tracking.rb +108 -82
  23. data/lib/brute/middleware/tool_use_guard.rb +57 -90
  24. data/lib/brute/middleware/tracing.rb +53 -63
  25. data/lib/brute/middleware.rb +18 -0
  26. data/lib/brute/orchestrator/turn.rb +105 -0
  27. data/lib/brute/pipeline.rb +77 -133
  28. data/lib/brute/prompts/build_switch.rb +21 -25
  29. data/lib/brute/prompts/environment.rb +31 -35
  30. data/lib/brute/prompts/identity.rb +22 -29
  31. data/lib/brute/prompts/instructions.rb +15 -18
  32. data/lib/brute/prompts/max_steps.rb +18 -25
  33. data/lib/brute/prompts/plan_reminder.rb +18 -26
  34. data/lib/brute/prompts/skills.rb +8 -30
  35. data/lib/brute/prompts.rb +28 -0
  36. data/lib/brute/providers/ollama.rb +135 -0
  37. data/lib/brute/providers/shell.rb +2 -2
  38. data/lib/brute/providers/shell_response.rb +2 -2
  39. data/lib/brute/providers.rb +62 -0
  40. data/lib/brute/queue/base_queue.rb +222 -0
  41. data/lib/brute/{file_mutation_queue.rb → queue/file_mutation_queue.rb} +28 -26
  42. data/lib/brute/queue/parallel_queue.rb +66 -0
  43. data/lib/brute/queue/sequential_queue.rb +63 -0
  44. data/lib/brute/store/message_store.rb +362 -0
  45. data/lib/brute/store/session.rb +106 -0
  46. data/lib/brute/{snapshot_store.rb → store/snapshot_store.rb} +2 -0
  47. data/lib/brute/{todo_store.rb → store/todo_store.rb} +2 -0
  48. data/lib/brute/system_prompt.rb +81 -194
  49. data/lib/brute/tools/delegate.rb +46 -116
  50. data/lib/brute/tools/fs_patch.rb +36 -37
  51. data/lib/brute/tools/fs_remove.rb +2 -2
  52. data/lib/brute/tools/fs_undo.rb +2 -2
  53. data/lib/brute/tools/fs_write.rb +29 -41
  54. data/lib/brute/tools/todo_read.rb +1 -1
  55. data/lib/brute/tools/todo_write.rb +1 -1
  56. data/lib/brute/tools.rb +31 -0
  57. data/lib/brute/version.rb +1 -1
  58. data/lib/brute.rb +40 -204
  59. metadata +31 -20
  60. data/lib/brute/agent_stream.rb +0 -181
  61. data/lib/brute/hooks.rb +0 -84
  62. data/lib/brute/message_store.rb +0 -463
  63. data/lib/brute/orchestrator.rb +0 -550
  64. data/lib/brute/session.rb +0 -161
@@ -0,0 +1,520 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/setup"
4
+ require "brute"
5
+
6
+ module Brute
7
+ module Loop
8
+ # Factory + namespace for provider-specific agent turns.
9
+ #
10
+ # An agent turn sends a message to the LLM, iterates over tool calls
11
+ # until there are none left, and returns the response. Each turn has
12
+ # its own job queue for tool execution (ParallelQueue of ToolCallSteps).
13
+ #
14
+ # Usage:
15
+ #
16
+ # step = AgentTurn.perform(agent:, session:, pipeline:, input:)
17
+ #
18
+ # AgentTurn.perform detects the provider from the agent and returns
19
+ # the appropriate provider-specific Step subclass, already executed.
20
+ # The returned step has .state, .result, .error, etc.
21
+ #
22
+ # Provider-specific subclasses live under AgentTurn:: and override
23
+ # supported_messages to filter the session's message history per
24
+ # provider capability.
25
+ #
26
+ module AgentTurn
27
+ # Build and return the right AgentTurn step for this agent's provider.
28
+ # Does NOT execute it — call step.call(task) yourself, or enqueue it.
29
+ def self.new(agent:, session:, pipeline:, input: nil, callbacks: {}, **rest)
30
+ klass = detect(agent.provider)
31
+ klass.new(agent: agent, session: session, pipeline: pipeline, input: input, callbacks: callbacks, **rest)
32
+ end
33
+
34
+ # Build, execute inside a Sync block, return the finished step.
35
+ def self.perform(agent:, session:, pipeline:, input: nil, callbacks: {}, **rest)
36
+ step = self.new(agent: agent, session: session, pipeline: pipeline, input: input, callbacks: callbacks, **rest)
37
+ Sync do
38
+ step.call(Async::Task.current)
39
+ end
40
+ step
41
+ end
42
+
43
+ # Detect the right subclass from the provider.
44
+ def self.detect(provider)
45
+ if provider
46
+ provider.class.name.to_s.downcase.then do |class_name|
47
+ if class_name.include?("anthropic")
48
+ Anthropic
49
+ elsif class_name.include?("openai")
50
+ OpenAI
51
+ elsif class_name.include?("google") || class_name.include?("gemini")
52
+ Google
53
+ else
54
+ Base
55
+ end
56
+ end
57
+ else
58
+ Base
59
+ end
60
+ end
61
+
62
+ # The default implementation. Works for any provider.
63
+ # Provider-specific subclasses override supported_messages
64
+ # and anything else that differs.
65
+ #
66
+ # LLM::Context is built fresh for each pipeline call by the LLMCall
67
+ # middleware. The agent turn owns the conversation state via
68
+ # env[:messages] (an Array<LLM::Message>).
69
+ #
70
+ # Supports two modes:
71
+ #
72
+ # Non-streaming (default): text arrives after the LLM call completes,
73
+ # on_content fires post-hoc via LLMCall middleware, tool calls come
74
+ # from env[:pending_functions].
75
+ #
76
+ # Streaming: enabled when on_content or on_reasoning callbacks are
77
+ # present. Text/reasoning fire incrementally via AgentStream. Tool
78
+ # calls are deferred during the stream and collected afterward from
79
+ # the stream's pending_tools.
80
+ #
81
+ # Callbacks:
82
+ #
83
+ # on_content: ->(text) {} # text chunk (streaming) or full text (non-streaming)
84
+ # on_reasoning: ->(text) {} # reasoning/thinking chunk (streaming only)
85
+ # on_tool_call_start: ->(batch) {} # [{name:, arguments:}, ...] before tool execution
86
+ # on_tool_result: ->(name, r) {} # per-tool, after each completes
87
+ # on_question: ->(questions, queue) {} # interactive; push answers onto queue
88
+ #
89
+ class Base < Step
90
+ MAX_ITERATIONS = 100
91
+
92
+ attr_reader :agent, :session
93
+
94
+ def initialize(agent:, session:, pipeline:, input: nil, callbacks: {}, **rest)
95
+ super(**rest)
96
+ @agent = agent
97
+ @session = session
98
+ @pipeline = pipeline
99
+ @input = input
100
+ @callbacks = callbacks
101
+
102
+ # Create streaming bridge when content or reasoning callbacks are
103
+ # present. The stream is passed into env so LLMCall can wire it
104
+ # into each fresh LLM::Context.
105
+ if @callbacks[:on_content] || @callbacks[:on_reasoning]
106
+ @stream = AgentStream.new(
107
+ on_content: @callbacks[:on_content],
108
+ on_reasoning: @callbacks[:on_reasoning],
109
+ on_question: @callbacks[:on_question],
110
+ )
111
+ end
112
+ end
113
+
114
+ def perform(task)
115
+ env = build_env
116
+
117
+ # First LLM call
118
+ env[:input] = build_initial_input(@input)
119
+ env[:tool_results] = nil
120
+ response = @pipeline.call(env)
121
+
122
+ iterations = 0
123
+ while !env[:should_exit] &&
124
+ (pending = collect_pending_tools(env)).any? &&
125
+ iterations < MAX_ITERATIONS
126
+
127
+ # Fire on_tool_call_start with the full batch
128
+ @callbacks[:on_tool_call_start]&.call(
129
+ pending.map { |fn, _| { name: fn.name, arguments: fn.arguments } }
130
+ )
131
+
132
+ # Partition: question tools run sequentially on this fiber,
133
+ # all others run in parallel via the sub-queue.
134
+ questions, others = pending.partition { |fn, _| fn.name == "question" }
135
+
136
+ results = []
137
+
138
+ # Questions first — sequential, blocking, with on_question fiber-local
139
+ questions.each do |fn, err|
140
+ if err
141
+ @callbacks[:on_tool_result]&.call(err.name, result_value(err))
142
+ results << err
143
+ else
144
+ Thread.current[:on_question] = @callbacks[:on_question]
145
+ result = fn.call
146
+ @callbacks[:on_tool_result]&.call(fn.name, result_value(result))
147
+ results << result
148
+ end
149
+ end
150
+
151
+ # Others — into the parallel queue
152
+ if others.any?
153
+ errors, executable = others.partition { |_, err| err }
154
+
155
+ # Record pre-existing errors (from stream's on_tool_call)
156
+ errors.each do |_, err|
157
+ @callbacks[:on_tool_result]&.call(err.name, result_value(err))
158
+ results << err
159
+ end
160
+
161
+ if executable.any?
162
+ tool_steps = executable.map { |fn, _| ToolCallStep.new(function: fn) }
163
+ tool_steps.each { |s| jobs(type: Brute::Queue::ParallelQueue) << s }
164
+ jobs.drain
165
+
166
+ tool_steps.each do |s|
167
+ val = s.state == :completed ? s.result : s.error
168
+ @callbacks[:on_tool_result]&.call(s.function.name, result_value(val))
169
+ results << val
170
+ end
171
+ end
172
+ end
173
+
174
+ # Feed results back to LLM
175
+ env[:input] = results
176
+ env[:tool_results] = results.filter_map { |r|
177
+ name = r.respond_to?(:name) ? r.name : "unknown"
178
+ [name, result_value(r)]
179
+ }
180
+ response = @pipeline.call(env)
181
+
182
+ # Re-create sub-queue for next iteration's tool calls
183
+ @mutex.synchronize { @jobs = nil }
184
+ iterations += 1
185
+ end
186
+
187
+ response
188
+ end
189
+
190
+ # Override in subclasses to filter message types per provider.
191
+ # Default: all messages pass through.
192
+ def supported_messages(messages)
193
+ messages
194
+ end
195
+
196
+ private
197
+
198
+ def build_env
199
+ {
200
+ provider: @agent.provider,
201
+ model: @agent.model,
202
+ input: nil,
203
+ tools: @agent.tools,
204
+ messages: [],
205
+ stream: @stream,
206
+ params: {},
207
+ metadata: {},
208
+ tool_results: nil,
209
+ streaming: !!@stream,
210
+ callbacks: @callbacks,
211
+ should_exit: nil,
212
+ pending_functions: [],
213
+ }
214
+ end
215
+
216
+ def build_initial_input(user_message)
217
+ sys = @agent.system_prompt
218
+ LLM::Prompt.new(@agent.provider) do |p|
219
+ p.system(sys) if sys
220
+ p.user(user_message) if user_message
221
+ end
222
+ end
223
+
224
+ # Collect pending tool calls from the stream (streaming mode) or
225
+ # from env[:pending_functions] (set by LLMCall after each call).
226
+ #
227
+ # Returns [(function, error_or_nil), ...] pairs.
228
+ # Clears the stream's deferred state after consumption.
229
+ def collect_pending_tools(env)
230
+ if @stream&.pending_tools&.any?
231
+ @stream.pending_tools.dup.tap { @stream.clear_pending_tools! }
232
+ elsif env[:pending_functions]&.any?
233
+ env[:pending_functions].dup.tap { env[:pending_functions] = [] }.map { |fn| [fn, nil] }
234
+ else
235
+ []
236
+ end
237
+ end
238
+
239
+ def result_value(result)
240
+ result.respond_to?(:value) ? result.value : result
241
+ end
242
+ end
243
+
244
+ # Provider-specific subclasses. Override supported_messages
245
+ # or loop behavior as needed.
246
+
247
+ class Anthropic < Base
248
+ end
249
+
250
+ class OpenAI < Base
251
+ end
252
+
253
+ class Google < Base
254
+ end
255
+ end
256
+ end
257
+ end
258
+
259
+ test do
260
+ require_relative "../../../spec/support/mock_provider"
261
+ require_relative "../../../spec/support/mock_response"
262
+
263
+ class RecordingPipeline
264
+ attr_reader :calls
265
+ def initialize(responses: [])
266
+ @responses = responses
267
+ @calls = []
268
+ @index = 0
269
+ end
270
+
271
+ def call(env)
272
+ @calls << env[:input]
273
+ resp = @responses[@index] || @responses.last
274
+ @index += 1
275
+ resp
276
+ end
277
+ end
278
+
279
+ FakeResponse = Struct.new(:content)
280
+
281
+ def make_agent(provider: MockProvider.new, tools: [])
282
+ Brute::Agent.new(provider: provider, model: nil, tools: tools)
283
+ end
284
+
285
+ # -- factory detection --
286
+
287
+ it "detects Base for unknown providers" do
288
+ Brute::Loop::AgentTurn.detect(MockProvider.new).should == Brute::Loop::AgentTurn::Base
289
+ end
290
+
291
+ it "detects Anthropic from provider class name" do
292
+ provider = MockProvider.new
293
+ def provider.class; Class.new { def self.name; "LLM::Anthropic"; end }; end
294
+ Brute::Loop::AgentTurn.detect(provider).should == Brute::Loop::AgentTurn::Anthropic
295
+ end
296
+
297
+ it "detects OpenAI from provider class name" do
298
+ provider = MockProvider.new
299
+ def provider.class; Class.new { def self.name; "LLM::OpenAI"; end }; end
300
+ Brute::Loop::AgentTurn.detect(provider).should == Brute::Loop::AgentTurn::OpenAI
301
+ end
302
+
303
+ it "detects Google from provider class name" do
304
+ provider = MockProvider.new
305
+ def provider.class; Class.new { def self.name; "LLM::Google"; end }; end
306
+ Brute::Loop::AgentTurn.detect(provider).should == Brute::Loop::AgentTurn::Google
307
+ end
308
+
309
+ # -- AgentTurn.new returns the right subclass --
310
+
311
+ it "returns Base instance for unknown provider" do
312
+ step = Brute::Loop::AgentTurn.new(
313
+ agent: make_agent,
314
+ session: Brute::Store::Session.new,
315
+ pipeline: RecordingPipeline.new(responses: []),
316
+ input: "hi",
317
+ )
318
+ step.should.be.kind_of Brute::Loop::AgentTurn::Base
319
+ end
320
+
321
+ # -- basic turn execution --
322
+
323
+ it "calls the pipeline" do
324
+ Sync do
325
+ pipeline = RecordingPipeline.new(responses: [FakeResponse.new("hello")])
326
+ step = Brute::Loop::AgentTurn.new(
327
+ agent: make_agent,
328
+ session: Brute::Store::Session.new,
329
+ pipeline: pipeline,
330
+ input: "hi",
331
+ )
332
+ step.call(Async::Task.current)
333
+ pipeline.calls.size.should == 1
334
+ end
335
+ end
336
+
337
+ it "returns the LLM response as result" do
338
+ Sync do
339
+ pipeline = RecordingPipeline.new(responses: [FakeResponse.new("world")])
340
+ step = Brute::Loop::AgentTurn.new(
341
+ agent: make_agent,
342
+ session: Brute::Store::Session.new,
343
+ pipeline: pipeline,
344
+ input: "hi",
345
+ )
346
+ step.call(Async::Task.current)
347
+ step.result.content.should == "world"
348
+ end
349
+ end
350
+
351
+ it "transitions to completed" do
352
+ Sync do
353
+ pipeline = RecordingPipeline.new(responses: [FakeResponse.new("ok")])
354
+ step = Brute::Loop::AgentTurn.new(
355
+ agent: make_agent,
356
+ session: Brute::Store::Session.new,
357
+ pipeline: pipeline,
358
+ input: "hi",
359
+ )
360
+ step.call(Async::Task.current)
361
+ step.state.should == :completed
362
+ end
363
+ end
364
+
365
+ # -- AgentTurn.perform convenience --
366
+
367
+ it "perform returns a completed step" do
368
+ pipeline = RecordingPipeline.new(responses: [FakeResponse.new("done")])
369
+ step = Brute::Loop::AgentTurn.perform(
370
+ agent: make_agent,
371
+ session: Brute::Store::Session.new,
372
+ pipeline: pipeline,
373
+ input: "hi",
374
+ )
375
+ step.state.should == :completed
376
+ end
377
+
378
+ # -- cancellation --
379
+
380
+ it "is cancellable when pending" do
381
+ step = Brute::Loop::AgentTurn.new(
382
+ agent: Brute::Agent.new(provider: nil, model: nil, tools: []),
383
+ session: Brute::Store::Session.new,
384
+ pipeline: RecordingPipeline.new(responses: []),
385
+ input: "hi",
386
+ )
387
+ step.cancel
388
+ step.state.should == :cancelled
389
+ end
390
+
391
+ # -- system prompt from agent --
392
+
393
+ it "uses agent system_prompt" do
394
+ Sync do
395
+ agent = Brute::Agent.new(
396
+ provider: MockProvider.new,
397
+ model: nil,
398
+ tools: [],
399
+ system_prompt: "You are a test bot",
400
+ )
401
+ pipeline = RecordingPipeline.new(responses: [FakeResponse.new("ok")])
402
+ step = Brute::Loop::AgentTurn.new(
403
+ agent: agent,
404
+ session: Brute::Store::Session.new,
405
+ pipeline: pipeline,
406
+ input: "hi",
407
+ )
408
+ step.call(Async::Task.current)
409
+ step.state.should == :completed
410
+ end
411
+ end
412
+
413
+ # -- should_exit loop break --
414
+
415
+ # A mock function that satisfies ToolCallStep's interface.
416
+ LoopTestFunction = Struct.new(:id, :name, :arguments, keyword_init: true) do
417
+ def call; self; end
418
+ def value; "tool_result"; end
419
+ end
420
+
421
+ # Pipeline that injects pending_functions and optionally sets should_exit.
422
+ class ShouldExitPipeline
423
+ attr_reader :call_count
424
+
425
+ def initialize(exit_on_call: nil)
426
+ @exit_on_call = exit_on_call
427
+ @call_count = 0
428
+ @fn = LoopTestFunction.new(id: "call_1", name: "test_tool", arguments: "{}")
429
+ end
430
+
431
+ def call(env)
432
+ @call_count += 1
433
+
434
+ # Always give pending functions so the loop would continue.
435
+ env[:pending_functions] = [@fn]
436
+
437
+ if @exit_on_call && @call_count >= @exit_on_call
438
+ env[:should_exit] = {
439
+ reason: "test_exit",
440
+ message: "forced exit for test",
441
+ source: "ShouldExitPipeline",
442
+ }
443
+ end
444
+
445
+ FakeResponse.new("response #{@call_count}")
446
+ end
447
+ end
448
+
449
+ it "breaks the loop when should_exit is set on the initial call" do
450
+ Sync do
451
+ pipeline = ShouldExitPipeline.new(exit_on_call: 1)
452
+ step = Brute::Loop::AgentTurn.new(
453
+ agent: make_agent,
454
+ session: Brute::Store::Session.new,
455
+ pipeline: pipeline,
456
+ input: "hi",
457
+ )
458
+ step.call(Async::Task.current)
459
+
460
+ # Pipeline called once (initial call). The loop never entered
461
+ # because should_exit was set before the while guard.
462
+ pipeline.call_count.should == 1
463
+ step.state.should == :completed
464
+ end
465
+ end
466
+
467
+ it "breaks the loop mid-iteration when should_exit is set" do
468
+ Sync do
469
+ # exit_on_call: 2 means the first call returns tools (loop enters),
470
+ # the second call (inside the loop) sets should_exit.
471
+ pipeline = ShouldExitPipeline.new(exit_on_call: 2)
472
+ step = Brute::Loop::AgentTurn.new(
473
+ agent: make_agent,
474
+ session: Brute::Store::Session.new,
475
+ pipeline: pipeline,
476
+ input: "hi",
477
+ )
478
+ step.call(Async::Task.current)
479
+
480
+ # Two calls: initial + one loop iteration. The loop did not
481
+ # continue to a third call because should_exit was set.
482
+ pipeline.call_count.should == 2
483
+ step.state.should == :completed
484
+ end
485
+ end
486
+
487
+ it "loops normally when should_exit is not set" do
488
+ Sync do
489
+ call_count = 0
490
+ fn = LoopTestFunction.new(id: "call_1", name: "test_tool", arguments: "{}")
491
+
492
+ pipeline_obj = Object.new
493
+ pipeline_obj.define_singleton_method(:call_count) { call_count }
494
+ pipeline_obj.define_singleton_method(:call) do |env|
495
+ call_count += 1
496
+ if call_count <= 3
497
+ env[:pending_functions] = [fn]
498
+ else
499
+ env[:pending_functions] = []
500
+ end
501
+ FakeResponse.new("response #{call_count}")
502
+ end
503
+
504
+ step = Brute::Loop::AgentTurn.new(
505
+ agent: make_agent,
506
+ session: Brute::Store::Session.new,
507
+ pipeline: pipeline_obj,
508
+ input: "hi",
509
+ )
510
+ step.call(Async::Task.current)
511
+
512
+ # Call 1 (initial) → pending_functions has fn → loop enters
513
+ # Loop iter 1: execute tools, call pipeline (call 2) → still has fn → continues
514
+ # Loop iter 2: execute tools, call pipeline (call 3) → still has fn → continues
515
+ # Loop iter 3: execute tools, call pipeline (call 4) → empty → exits
516
+ call_count.should == 4
517
+ step.state.should == :completed
518
+ end
519
+ end
520
+ end
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Brute
4
+ module Loop
4
5
  # Context compaction service. When the conversation grows past configurable
5
6
  # thresholds, older messages are summarized into a condensed form and the
6
7
  # original messages are dropped, keeping the context window manageable.
@@ -102,4 +103,5 @@ module Brute
102
103
  res.content
103
104
  end
104
105
  end
106
+ end
105
107
  end
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Brute
4
+ module Loop
4
5
  # Detects when the agent is stuck in a repeating pattern of tool calls.
5
6
  #
6
7
  # Two types of loops are detected:
@@ -81,4 +82,5 @@ module Brute
81
82
  count
82
83
  end
83
84
  end
85
+ end
84
86
  end