brute 0.4.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/lib/brute/agent.rb +14 -0
  3. data/lib/brute/diff.rb +24 -0
  4. data/lib/brute/loop/agent_stream.rb +118 -0
  5. data/lib/brute/loop/agent_turn.rb +520 -0
  6. data/lib/brute/{compactor.rb → loop/compactor.rb} +2 -0
  7. data/lib/brute/{doom_loop.rb → loop/doom_loop.rb} +2 -0
  8. data/lib/brute/loop/step.rb +332 -0
  9. data/lib/brute/loop/tool_call_step.rb +90 -0
  10. data/lib/brute/middleware/compaction_check.rb +70 -23
  11. data/lib/brute/middleware/doom_loop_detection.rb +110 -7
  12. data/lib/brute/middleware/llm_call.rb +88 -1
  13. data/lib/brute/middleware/message_tracking.rb +140 -10
  14. data/lib/brute/middleware/otel/span.rb +32 -2
  15. data/lib/brute/middleware/otel/token_usage.rb +38 -0
  16. data/lib/brute/middleware/otel/tool_calls.rb +30 -1
  17. data/lib/brute/middleware/otel/tool_results.rb +29 -1
  18. data/lib/brute/middleware/otel.rb +5 -0
  19. data/lib/brute/middleware/reasoning_normalizer.rb +94 -0
  20. data/lib/brute/middleware/retry.rb +113 -1
  21. data/lib/brute/middleware/session_persistence.rb +46 -3
  22. data/lib/brute/middleware/token_tracking.rb +78 -0
  23. data/lib/brute/middleware/tool_error_tracking.rb +128 -1
  24. data/lib/brute/middleware/tool_use_guard.rb +64 -28
  25. data/lib/brute/middleware/tracing.rb +63 -2
  26. data/lib/brute/middleware.rb +18 -0
  27. data/lib/brute/orchestrator/turn.rb +105 -0
  28. data/lib/brute/patches/buffer_nil_guard.rb +5 -0
  29. data/lib/brute/pipeline.rb +86 -7
  30. data/lib/brute/prompts/build_switch.rb +29 -0
  31. data/lib/brute/prompts/environment.rb +43 -0
  32. data/lib/brute/prompts/identity.rb +29 -0
  33. data/lib/brute/prompts/instructions.rb +21 -0
  34. data/lib/brute/prompts/max_steps.rb +25 -0
  35. data/lib/brute/prompts/plan_reminder.rb +25 -0
  36. data/lib/brute/prompts/skills.rb +13 -0
  37. data/lib/brute/prompts.rb +28 -0
  38. data/lib/brute/providers/ollama.rb +135 -0
  39. data/lib/brute/providers/opencode_go.rb +5 -0
  40. data/lib/brute/providers/opencode_zen.rb +7 -2
  41. data/lib/brute/providers/shell.rb +2 -2
  42. data/lib/brute/providers/shell_response.rb +7 -2
  43. data/lib/brute/providers.rb +62 -0
  44. data/lib/brute/queue/base_queue.rb +222 -0
  45. data/lib/brute/{file_mutation_queue.rb → queue/file_mutation_queue.rb} +28 -26
  46. data/lib/brute/queue/parallel_queue.rb +66 -0
  47. data/lib/brute/queue/sequential_queue.rb +63 -0
  48. data/lib/brute/{message_store.rb → store/message_store.rb} +155 -62
  49. data/lib/brute/store/session.rb +106 -0
  50. data/lib/brute/{snapshot_store.rb → store/snapshot_store.rb} +2 -0
  51. data/lib/brute/{todo_store.rb → store/todo_store.rb} +2 -0
  52. data/lib/brute/system_prompt.rb +101 -0
  53. data/lib/brute/tools/delegate.rb +59 -0
  54. data/lib/brute/tools/fs_patch.rb +54 -2
  55. data/lib/brute/tools/fs_read.rb +5 -0
  56. data/lib/brute/tools/fs_remove.rb +7 -2
  57. data/lib/brute/tools/fs_search.rb +5 -0
  58. data/lib/brute/tools/fs_undo.rb +7 -2
  59. data/lib/brute/tools/fs_write.rb +40 -2
  60. data/lib/brute/tools/net_fetch.rb +5 -0
  61. data/lib/brute/tools/question.rb +5 -0
  62. data/lib/brute/tools/shell.rb +5 -0
  63. data/lib/brute/tools/todo_read.rb +6 -1
  64. data/lib/brute/tools/todo_write.rb +6 -1
  65. data/lib/brute/tools.rb +31 -0
  66. data/lib/brute/version.rb +1 -1
  67. data/lib/brute.rb +40 -204
  68. metadata +31 -20
  69. data/lib/brute/agent_stream.rb +0 -63
  70. data/lib/brute/hooks.rb +0 -84
  71. data/lib/brute/orchestrator.rb +0 -391
  72. data/lib/brute/session.rb +0 -161
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Brute
4
+ module Loop
4
5
  # Context compaction service. When the conversation grows past configurable
5
6
  # thresholds, older messages are summarized into a condensed form and the
6
7
  # original messages are dropped, keeping the context window manageable.
@@ -102,4 +103,5 @@ module Brute
102
103
  res.content
103
104
  end
104
105
  end
106
+ end
105
107
  end
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Brute
4
+ module Loop
4
5
  # Detects when the agent is stuck in a repeating pattern of tool calls.
5
6
  #
6
7
  # Two types of loops are detected:
@@ -81,4 +82,5 @@ module Brute
81
82
  count
82
83
  end
83
84
  end
85
+ end
84
86
  end
@@ -0,0 +1,332 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/setup"
4
+ require "brute"
5
+
6
+ require "securerandom"
7
+ require "async"
8
+
9
+ module Brute
10
+ module Loop
11
+ # A first-class work object with identity, state, result/error capture,
12
+ # optional sub-queue, and cancellation.
13
+ #
14
+ # Users subclass Step and override #perform(task). The framework calls
15
+ # #call(task) which owns the state machine — subclasses never touch
16
+ # state transitions directly.
17
+ #
18
+ # State machine:
19
+ #
20
+ # ┌──> completed
21
+ # │
22
+ # pending ──> running ──┤
23
+ # │ │
24
+ # │ ├──> failed
25
+ # │ │
26
+ # └──> cancelled └──> cancelled
27
+ #
28
+ # Three terminal states. Two non-terminal. Once terminal, stays terminal.
29
+ #
30
+ class Step
31
+ STATES = %i[pending running completed failed cancelled].freeze
32
+
33
+ attr_reader :id
34
+
35
+ def initialize(id: nil, **attributes)
36
+ @id = id || self.class.generate_id
37
+ @attributes = attributes
38
+ @state = :pending
39
+ @result = nil
40
+ @error = nil
41
+ @task = nil
42
+ @jobs = nil
43
+ @mutex = Mutex.new
44
+ end
45
+
46
+ def self.generate_id
47
+ "#{name}-#{Process.pid}-#{Thread.current.object_id}-#{SecureRandom.hex(4)}"
48
+ end
49
+
50
+ # Called by the queue's worker. Subclasses override #perform instead.
51
+ def call(task)
52
+ return unless transition_to_running(task)
53
+
54
+ begin
55
+ result = perform(task)
56
+ @mutex.synchronize do
57
+ @result = result
58
+ @state = :completed
59
+ @task = nil
60
+ end
61
+
62
+ rescue Async::Cancel
63
+ # Cascade to sub-queue before we lose the reference:
64
+ @jobs&.cancel
65
+ @mutex.synchronize do
66
+ @state = :cancelled
67
+ @task = nil
68
+ end
69
+ raise
70
+
71
+ rescue => error
72
+ # Continue-on-failure: record the error, do NOT re-raise.
73
+ @mutex.synchronize do
74
+ @error = error
75
+ @state = :failed
76
+ @task = nil
77
+ end
78
+ end
79
+ end
80
+
81
+ # Subclasses override this.
82
+ def perform(task)
83
+ raise "#{self.class}#perform not implemented"
84
+ end
85
+
86
+ # Lazy accessor — creates the sub-queue parented to our running task.
87
+ # Only valid while the step is running (inside #perform).
88
+ def jobs(type: Brute::Queue::SequentialQueue)
89
+ @mutex.synchronize do
90
+ raise "Step not running; sub-queue has nothing to parent to" unless @task
91
+ @jobs ||= type.new(parent: @task).start
92
+ end
93
+ end
94
+
95
+ def state
96
+ @mutex.synchronize { @state }
97
+ end
98
+
99
+ def result
100
+ @mutex.synchronize { @result }
101
+ end
102
+
103
+ def error
104
+ @mutex.synchronize { @error }
105
+ end
106
+
107
+ def status
108
+ @mutex.synchronize do
109
+ { id: @id, state: @state, result: @result, error: @error }
110
+ end
111
+ end
112
+
113
+ def cancel
114
+ task = @mutex.synchronize do
115
+ case @state
116
+ when :pending
117
+ @state = :cancelled
118
+ nil
119
+ when :running
120
+ @task
121
+ else
122
+ return false # already finished
123
+ end
124
+ end
125
+
126
+ task&.cancel
127
+ true
128
+ end
129
+
130
+ private
131
+
132
+ def transition_to_running(task)
133
+ @mutex.synchronize do
134
+ return false if @state == :cancelled
135
+ @state = :running
136
+ @task = task
137
+ true
138
+ end
139
+ end
140
+ end
141
+ end
142
+ end
143
+
144
+ test do
145
+ class HelloStep < Brute::Loop::Step
146
+ def perform(task)
147
+ "hello"
148
+ end
149
+ end
150
+
151
+ class FailStep < Brute::Loop::Step
152
+ def perform(task)
153
+ raise "boom"
154
+ end
155
+ end
156
+
157
+ class SlowStep < Brute::Loop::Step
158
+ def perform(task)
159
+ sleep 10
160
+ "done"
161
+ end
162
+ end
163
+
164
+ # -- identity --
165
+
166
+ it "generates a unique id" do
167
+ HelloStep.new.id.should.be.kind_of String
168
+ end
169
+
170
+ it "accepts a custom id" do
171
+ HelloStep.new(id: "custom-1").id.should == "custom-1"
172
+ end
173
+
174
+ # -- initial state --
175
+
176
+ it "starts in pending state" do
177
+ HelloStep.new.state.should == :pending
178
+ end
179
+
180
+ it "starts with nil result" do
181
+ HelloStep.new.result.should.be.nil
182
+ end
183
+
184
+ it "starts with nil error" do
185
+ HelloStep.new.error.should.be.nil
186
+ end
187
+
188
+ # -- successful execution --
189
+
190
+ it "transitions to completed on success" do
191
+ Sync do
192
+ step = HelloStep.new
193
+ step.call(Async::Task.current)
194
+ step.state.should == :completed
195
+ end
196
+ end
197
+
198
+ it "captures the return value as result" do
199
+ Sync do
200
+ step = HelloStep.new
201
+ step.call(Async::Task.current)
202
+ step.result.should == "hello"
203
+ end
204
+ end
205
+
206
+ # -- failed execution --
207
+
208
+ it "transitions to failed on error" do
209
+ Sync do
210
+ step = FailStep.new
211
+ step.call(Async::Task.current)
212
+ step.state.should == :failed
213
+ end
214
+ end
215
+
216
+ it "captures the exception as error" do
217
+ Sync do
218
+ step = FailStep.new
219
+ step.call(Async::Task.current)
220
+ step.error.message.should == "boom"
221
+ end
222
+ end
223
+
224
+ it "does not re-raise on failure" do
225
+ Sync do
226
+ step = FailStep.new
227
+ lambda { step.call(Async::Task.current) }.should.not.raise
228
+ end
229
+ end
230
+
231
+ # -- cancellation of pending step --
232
+
233
+ it "cancel returns true for pending step" do
234
+ HelloStep.new.cancel.should.be.true
235
+ end
236
+
237
+ it "transitions pending step to cancelled" do
238
+ step = HelloStep.new
239
+ step.cancel
240
+ step.state.should == :cancelled
241
+ end
242
+
243
+ it "skips perform when cancelled before call" do
244
+ Sync do
245
+ step = HelloStep.new
246
+ step.cancel
247
+ step.call(Async::Task.current)
248
+ step.result.should.be.nil
249
+ end
250
+ end
251
+
252
+ # -- cancellation of finished step --
253
+
254
+ it "cancel returns false for completed step" do
255
+ Sync do
256
+ step = HelloStep.new
257
+ step.call(Async::Task.current)
258
+ step.cancel.should.be.false
259
+ end
260
+ end
261
+
262
+ it "cancel returns false for failed step" do
263
+ Sync do
264
+ step = FailStep.new
265
+ step.call(Async::Task.current)
266
+ step.cancel.should.be.false
267
+ end
268
+ end
269
+
270
+ # -- status --
271
+
272
+ it "status includes id" do
273
+ step = HelloStep.new(id: "s1")
274
+ step.status[:id].should == "s1"
275
+ end
276
+
277
+ it "status includes state" do
278
+ step = HelloStep.new
279
+ step.status[:state].should == :pending
280
+ end
281
+
282
+ # -- perform not implemented --
283
+
284
+ it "raises NotImplementedError for base Step" do
285
+ Sync do
286
+ step = Brute::Loop::Step.new
287
+ step.call(Async::Task.current)
288
+ step.state.should == :failed
289
+ end
290
+ end
291
+
292
+ # -- jobs raises when not running --
293
+
294
+ it "raises when accessing jobs outside perform" do
295
+ lambda { HelloStep.new.jobs(type: Array) }.should.raise(RuntimeError)
296
+ end
297
+
298
+ # -- attributes stored --
299
+
300
+ it "stores attributes" do
301
+ step = HelloStep.new(url: "https://example.com")
302
+ step.instance_variable_get(:@attributes)[:url].should == "https://example.com"
303
+ end
304
+
305
+ # -- nested sub-queue --
306
+
307
+ describe "nesting" do
308
+ class ParentStep < Brute::Loop::Step
309
+ def perform(task)
310
+ 3.times { |i| jobs(type: Brute::Queue::SequentialQueue) << HelloStep.new(id: "child-#{i}") }
311
+ jobs.drain
312
+ jobs.steps.map(&:result)
313
+ end
314
+ end
315
+
316
+ it "creates a sub-queue inside perform" do
317
+ Sync do
318
+ step = ParentStep.new
319
+ step.call(Async::Task.current)
320
+ step.result.should == ["hello", "hello", "hello"]
321
+ end
322
+ end
323
+
324
+ it "sub-steps all complete" do
325
+ Sync do
326
+ step = ParentStep.new
327
+ step.call(Async::Task.current)
328
+ step.instance_variable_get(:@jobs).steps.all? { |s| s.state == :completed }.should.be.true
329
+ end
330
+ end
331
+ end
332
+ end
@@ -0,0 +1,90 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/setup"
4
+ require "brute"
5
+
6
+ module Brute
7
+ module Loop
8
+ # A Step that wraps an LLM::Function tool call.
9
+ #
10
+ # Identity comes from the function's call ID so tool results
11
+ # can be correlated back to the LLM's request.
12
+ #
13
+ class ToolCallStep < Step
14
+ attr_reader :function
15
+
16
+ def initialize(function:, **rest)
17
+ super(id: function.id, **rest)
18
+ @function = function
19
+ end
20
+
21
+ def perform(task)
22
+ @function.call
23
+ end
24
+ end
25
+ end
26
+ end
27
+
28
+ test do
29
+ FakeFunction = Struct.new(:id, :name, :arguments, :return_value) do
30
+ def call
31
+ return_value
32
+ end
33
+ end
34
+
35
+ class FailFunction
36
+ attr_reader :id, :name, :arguments
37
+ def initialize
38
+ @id = "fail_1"
39
+ @name = "fail"
40
+ @arguments = {}
41
+ end
42
+ def call
43
+ raise "tool exploded"
44
+ end
45
+ end
46
+
47
+ it "uses function id as step id" do
48
+ fn = FakeFunction.new("call_123", "read", {}, "content")
49
+ Brute::Loop::ToolCallStep.new(function: fn).id.should == "call_123"
50
+ end
51
+
52
+ it "calls the function in perform" do
53
+ Sync do
54
+ fn = FakeFunction.new("call_1", "read", {}, "file contents")
55
+ step = Brute::Loop::ToolCallStep.new(function: fn)
56
+ step.call(Async::Task.current)
57
+ step.result.should == "file contents"
58
+ end
59
+ end
60
+
61
+ it "transitions to completed on success" do
62
+ Sync do
63
+ fn = FakeFunction.new("call_2", "write", {}, "ok")
64
+ step = Brute::Loop::ToolCallStep.new(function: fn)
65
+ step.call(Async::Task.current)
66
+ step.state.should == :completed
67
+ end
68
+ end
69
+
70
+ it "captures function as accessor" do
71
+ fn = FakeFunction.new("call_3", "shell", {}, nil)
72
+ Brute::Loop::ToolCallStep.new(function: fn).function.should.be.identical_to fn
73
+ end
74
+
75
+ it "transitions to failed when function raises" do
76
+ Sync do
77
+ step = Brute::Loop::ToolCallStep.new(function: FailFunction.new)
78
+ step.call(Async::Task.current)
79
+ step.state.should == :failed
80
+ end
81
+ end
82
+
83
+ it "captures function error" do
84
+ Sync do
85
+ step = Brute::Loop::ToolCallStep.new(function: FailFunction.new)
86
+ step.call(Async::Task.current)
87
+ step.error.message.should == "tool exploded"
88
+ end
89
+ end
90
+ end
@@ -1,59 +1,106 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "bundler/setup"
4
+ require "brute"
5
+
3
6
  module Brute
4
7
  module Middleware
5
8
  # Checks context size after each LLM call and triggers compaction
6
9
  # when thresholds are exceeded.
7
10
  #
8
- # Runs POST-call: inspects message count and token usage from the
9
- # response. If compaction is needed, summarizes older messages and
10
- # rebuilds the context with the summary + recent messages.
11
+ # Runs POST-call: inspects message count and token usage. If compaction
12
+ # is needed, summarizes older messages and replaces env[:messages] with
13
+ # the summary so the next LLM call starts with a compact history.
11
14
  #
12
15
  class CompactionCheck < Base
13
- def initialize(app, compactor:, system_prompt:, tools:, stream: nil)
16
+ def initialize(app, compactor:, system_prompt:)
14
17
  super(app)
15
18
  @compactor = compactor
16
19
  @system_prompt = system_prompt
17
- @tools = tools
18
- @stream = stream
19
20
  end
20
21
 
21
22
  def call(env)
22
23
  response = @app.call(env)
23
24
 
24
- ctx = env[:context]
25
- messages = ctx.messages.to_a.compact
26
- usage = ctx.usage rescue nil
25
+ messages = env[:messages]
26
+ usage = env[:metadata].dig(:tokens, :last_call)
27
27
 
28
28
  if @compactor.should_compact?(messages, usage: usage)
29
29
  result = @compactor.compact(messages)
30
30
  if result
31
31
  summary_text, _recent = result
32
- rebuild_context!(env, summary_text)
33
32
  env[:metadata][:compaction] = {
34
33
  messages_before: messages.size,
35
34
  timestamp: Time.now.iso8601,
36
35
  }
36
+ # Replace the message history with the summary
37
+ env[:messages] = [
38
+ LLM::Message.new(:system, @system_prompt),
39
+ LLM::Message.new(:user, "[Previous conversation summary]\n\n#{summary_text}"),
40
+ ]
37
41
  end
38
42
  end
39
43
 
40
44
  response
41
45
  end
46
+ end
47
+ end
48
+ end
42
49
 
43
- private
50
+ test do
51
+ require_relative "../../../spec/support/mock_provider"
52
+ require_relative "../../../spec/support/mock_response"
44
53
 
45
- def rebuild_context!(env, summary_text)
46
- provider = env[:provider]
47
- ctx_opts = { tools: @tools }
48
- ctx_opts[:stream] = @stream if @stream
49
- new_ctx = LLM::Context.new(provider, **ctx_opts)
50
- prompt = new_ctx.prompt do |p|
51
- p.system @system_prompt
52
- p.user "[Previous conversation summary]\n\n#{summary_text}"
53
- end
54
- new_ctx.talk(prompt)
55
- env[:context] = new_ctx
56
- end
54
+ def build_env(**overrides)
55
+ { provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
56
+ messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
57
+ tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
58
+ end
59
+
60
+ def make_compactor(should: false, result: nil)
61
+ Object.new.tap do |c|
62
+ c.define_singleton_method(:should_compact?) { |_msgs, **_| should }
63
+ c.define_singleton_method(:compact) { |_msgs| result }
57
64
  end
58
65
  end
66
+
67
+ it "passes the response through when compaction is not needed" do
68
+ response = MockResponse.new(content: "compaction response")
69
+ compactor = make_compactor(should: false)
70
+ middleware = Brute::Middleware::CompactionCheck.new(->(_env) { response }, compactor: compactor, system_prompt: "sys")
71
+ result = middleware.call(build_env)
72
+ result.should == response
73
+ end
74
+
75
+ it "does not set compaction metadata when not needed" do
76
+ compactor = make_compactor(should: false)
77
+ middleware = Brute::Middleware::CompactionCheck.new(->(_env) { MockResponse.new }, compactor: compactor, system_prompt: "sys")
78
+ env = build_env
79
+ middleware.call(env)
80
+ env[:metadata][:compaction].should.be.nil
81
+ end
82
+
83
+ it "replaces messages with summary when compaction triggers" do
84
+ compactor = make_compactor(should: true, result: ["Summary of conversation", []])
85
+ middleware = Brute::Middleware::CompactionCheck.new(->(_env) { MockResponse.new }, compactor: compactor, system_prompt: "sys")
86
+ env = build_env(messages: [LLM::Message.new(:user, "hello"), LLM::Message.new(:assistant, "hi"), LLM::Message.new(:user, "how")])
87
+ middleware.call(env)
88
+ env[:metadata][:compaction][:messages_before].should == 3
89
+ end
90
+
91
+ it "creates two messages after compaction" do
92
+ compactor = make_compactor(should: true, result: ["Summary", []])
93
+ middleware = Brute::Middleware::CompactionCheck.new(->(_env) { MockResponse.new }, compactor: compactor, system_prompt: "sys")
94
+ env = build_env(messages: [LLM::Message.new(:user, "hello")])
95
+ middleware.call(env)
96
+ env[:messages].size.should == 2
97
+ end
98
+
99
+ it "handles compactor returning nil gracefully" do
100
+ compactor = make_compactor(should: true, result: nil)
101
+ middleware = Brute::Middleware::CompactionCheck.new(->(_env) { MockResponse.new }, compactor: compactor, system_prompt: "sys")
102
+ env = build_env(messages: [LLM::Message.new(:user, "hello")])
103
+ middleware.call(env)
104
+ env[:metadata][:compaction].should.be.nil
105
+ end
59
106
  end