brute 0.4.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +4 -4
  2. data/lib/brute/agent.rb +14 -0
  3. data/lib/brute/diff.rb +18 -28
  4. data/lib/brute/loop/agent_stream.rb +118 -0
  5. data/lib/brute/loop/agent_turn.rb +520 -0
  6. data/lib/brute/{compactor.rb → loop/compactor.rb} +2 -0
  7. data/lib/brute/{doom_loop.rb → loop/doom_loop.rb} +2 -0
  8. data/lib/brute/loop/step.rb +332 -0
  9. data/lib/brute/loop/tool_call_step.rb +90 -0
  10. data/lib/brute/middleware/compaction_check.rb +60 -146
  11. data/lib/brute/middleware/doom_loop_detection.rb +95 -92
  12. data/lib/brute/middleware/llm_call.rb +78 -80
  13. data/lib/brute/middleware/message_tracking.rb +115 -162
  14. data/lib/brute/middleware/otel/span.rb +25 -106
  15. data/lib/brute/middleware/otel/token_usage.rb +29 -84
  16. data/lib/brute/middleware/otel/tool_calls.rb +23 -107
  17. data/lib/brute/middleware/otel/tool_results.rb +22 -86
  18. data/lib/brute/middleware/reasoning_normalizer.rb +78 -103
  19. data/lib/brute/middleware/retry.rb +95 -76
  20. data/lib/brute/middleware/session_persistence.rb +38 -37
  21. data/lib/brute/middleware/token_tracking.rb +64 -63
  22. data/lib/brute/middleware/tool_error_tracking.rb +108 -82
  23. data/lib/brute/middleware/tool_use_guard.rb +57 -90
  24. data/lib/brute/middleware/tracing.rb +53 -63
  25. data/lib/brute/middleware.rb +18 -0
  26. data/lib/brute/orchestrator/turn.rb +105 -0
  27. data/lib/brute/pipeline.rb +77 -133
  28. data/lib/brute/prompts/build_switch.rb +21 -25
  29. data/lib/brute/prompts/environment.rb +31 -35
  30. data/lib/brute/prompts/identity.rb +22 -29
  31. data/lib/brute/prompts/instructions.rb +15 -18
  32. data/lib/brute/prompts/max_steps.rb +18 -25
  33. data/lib/brute/prompts/plan_reminder.rb +18 -26
  34. data/lib/brute/prompts/skills.rb +8 -30
  35. data/lib/brute/prompts.rb +28 -0
  36. data/lib/brute/providers/ollama.rb +135 -0
  37. data/lib/brute/providers/shell.rb +2 -2
  38. data/lib/brute/providers/shell_response.rb +2 -2
  39. data/lib/brute/providers.rb +62 -0
  40. data/lib/brute/queue/base_queue.rb +222 -0
  41. data/lib/brute/{file_mutation_queue.rb → queue/file_mutation_queue.rb} +28 -26
  42. data/lib/brute/queue/parallel_queue.rb +66 -0
  43. data/lib/brute/queue/sequential_queue.rb +63 -0
  44. data/lib/brute/store/message_store.rb +362 -0
  45. data/lib/brute/store/session.rb +106 -0
  46. data/lib/brute/{snapshot_store.rb → store/snapshot_store.rb} +2 -0
  47. data/lib/brute/{todo_store.rb → store/todo_store.rb} +2 -0
  48. data/lib/brute/system_prompt.rb +81 -194
  49. data/lib/brute/tools/delegate.rb +46 -116
  50. data/lib/brute/tools/fs_patch.rb +36 -37
  51. data/lib/brute/tools/fs_remove.rb +2 -2
  52. data/lib/brute/tools/fs_undo.rb +2 -2
  53. data/lib/brute/tools/fs_write.rb +29 -41
  54. data/lib/brute/tools/todo_read.rb +1 -1
  55. data/lib/brute/tools/todo_write.rb +1 -1
  56. data/lib/brute/tools.rb +31 -0
  57. data/lib/brute/version.rb +1 -1
  58. data/lib/brute.rb +40 -204
  59. metadata +31 -20
  60. data/lib/brute/agent_stream.rb +0 -181
  61. data/lib/brute/hooks.rb +0 -84
  62. data/lib/brute/message_store.rb +0 -463
  63. data/lib/brute/orchestrator.rb +0 -550
  64. data/lib/brute/session.rb +0 -161
@@ -1,34 +1,39 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- if __FILE__ == $0
4
- require "bundler/setup"
5
- require "brute"
6
- end
3
+ require "bundler/setup"
4
+ require "brute"
7
5
 
8
6
  module Brute
9
7
  module Middleware
10
8
  # Detects when the agent is stuck repeating tool call patterns and injects
11
- # a corrective warning into the context before the next LLM call.
9
+ # a corrective warning into the message history before the next LLM call.
12
10
  #
13
11
  # Runs PRE-call: inspects the conversation history for repeating tool call
14
- # patterns. If detected, talks a warning message into the context so the
15
- # LLM sees it as input alongside the normal tool results.
12
+ # patterns. If detected, appends a warning message so the LLM sees it as
13
+ # input alongside the normal tool results.
16
14
  #
17
15
  class DoomLoopDetection < Base
18
16
  def initialize(app, threshold: 3)
19
17
  super(app)
20
- @detector = Brute::DoomLoopDetector.new(threshold: threshold)
18
+ @detector = Brute::Loop::DoomLoopDetector.new(threshold: threshold)
21
19
  end
22
20
 
23
21
  def call(env)
24
- ctx = env[:context]
25
- messages = ctx.messages.to_a
22
+ messages = env[:messages]
26
23
 
27
24
  if (reps = @detector.detect(messages))
28
25
  warning = @detector.warning_message(reps)
29
26
  # Inject the warning as a user message so the LLM sees it
30
- ctx.talk(warning)
27
+ env[:messages] << LLM::Message.new(:user, warning)
31
28
  env[:metadata][:doom_loop_detected] = reps
29
+
30
+ # Signal the agent loop to exit after this LLM call completes.
31
+ # First-writer-wins: don't overwrite if another middleware already set it.
32
+ env[:should_exit] ||= {
33
+ reason: "doom_loop_detected",
34
+ message: "Agent is stuck repeating the same tool calls (#{reps} repetitions).",
35
+ source: "DoomLoopDetection",
36
+ }
32
37
  end
33
38
 
34
39
  @app.call(env)
@@ -37,97 +42,95 @@ module Brute
37
42
  end
38
43
  end
39
44
 
40
- if __FILE__ == $0
41
- require_relative "../../../spec/spec_helper"
42
-
43
- RSpec.describe Brute::Middleware::DoomLoopDetection do
44
- let(:response) { MockResponse.new(content: "loop check") }
45
- let(:inner_app) { ->(_env) { response } }
46
-
47
- # Build a fake assistant message whose .functions returns the given list.
48
- def assistant_msg_with_functions(function_list)
49
- msg = LLM::Message.new(:assistant, "tool msg", {})
50
- allow(msg).to receive(:functions).and_return(function_list)
51
- msg
52
- end
53
-
54
- def fake_function(name:, arguments:)
55
- double("fn", name: name, arguments: arguments)
56
- end
57
-
58
- it "passes through when no doom loop is detected" do
59
- middleware = described_class.new(inner_app, threshold: 3)
60
- env = build_env
61
-
62
- result = middleware.call(env)
63
-
64
- expect(result).to eq(response)
65
- expect(env[:metadata][:doom_loop_detected]).to be_nil
66
- end
67
-
68
- it "detects consecutive identical tool calls" do
69
- provider = MockProvider.new
70
- ctx = LLM::Context.new(provider, tools: [])
71
-
72
- fn = fake_function(name: "fs_read", arguments: '{"path":"x.rb"}')
73
- messages = 4.times.map { assistant_msg_with_functions([fn]) }
74
-
75
- allow(ctx).to receive(:messages).and_return(double("buffer", to_a: messages))
76
- allow(ctx).to receive(:talk)
77
-
78
- middleware = described_class.new(inner_app, threshold: 3)
79
- env = build_env(context: ctx, provider: provider)
80
-
81
- middleware.call(env)
82
-
83
- expect(env[:metadata][:doom_loop_detected]).not_to be_nil
84
- end
85
-
86
- it "detects repeating sequences [A,B,A,B,A,B]" do
87
- provider = MockProvider.new
88
- ctx = LLM::Context.new(provider, tools: [])
45
+ test do
46
+ require_relative "../../../spec/support/mock_provider"
47
+ require_relative "../../../spec/support/mock_response"
89
48
 
90
- fn_a = fake_function(name: "fs_read", arguments: '{"path":"a.rb"}')
91
- fn_b = fake_function(name: "shell", arguments: '{"cmd":"ls"}')
92
- messages = 3.times.flat_map do
93
- [assistant_msg_with_functions([fn_a]), assistant_msg_with_functions([fn_b])]
94
- end
95
-
96
- allow(ctx).to receive(:messages).and_return(double("buffer", to_a: messages))
97
- allow(ctx).to receive(:talk)
49
+ def build_env(**overrides)
50
+ { provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
51
+ messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
52
+ tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
53
+ end
98
54
 
99
- middleware = described_class.new(inner_app, threshold: 3)
100
- env = build_env(context: ctx, provider: provider)
55
+ FakeFunc = Struct.new(:name, :arguments, keyword_init: true)
101
56
 
102
- middleware.call(env)
57
+ def assistant_msg_with_functions(function_list)
58
+ msg = LLM::Message.new(:assistant, "tool msg", {})
59
+ msg.define_singleton_method(:functions) { function_list }
60
+ msg
61
+ end
103
62
 
104
- expect(env[:metadata][:doom_loop_detected]).not_to be_nil
105
- end
63
+ it "passes through when no doom loop is detected" do
64
+ inner_app = ->(_env) { MockResponse.new(content: "loop check") }
65
+ middleware = Brute::Middleware::DoomLoopDetection.new(inner_app, threshold: 3)
66
+ env = build_env
67
+ middleware.call(env)
68
+ env[:metadata][:doom_loop_detected].should.be.nil
69
+ end
106
70
 
107
- it "does not trigger below the threshold" do
108
- provider = MockProvider.new
109
- ctx = LLM::Context.new(provider, tools: [])
71
+ it "detects consecutive identical tool calls" do
72
+ inner_app = ->(_env) { MockResponse.new(content: "loop check") }
73
+ fn = FakeFunc.new(name: "fs_read", arguments: '{"path":"x.rb"}')
74
+ messages = 4.times.map { assistant_msg_with_functions([fn]) }
75
+ middleware = Brute::Middleware::DoomLoopDetection.new(inner_app, threshold: 3)
76
+ env = build_env(messages: messages)
77
+ middleware.call(env)
78
+ env[:metadata][:doom_loop_detected].should.not.be.nil
79
+ end
110
80
 
111
- fn = fake_function(name: "fs_read", arguments: '{"path":"x.rb"}')
112
- messages = 2.times.map { assistant_msg_with_functions([fn]) }
81
+ it "does not trigger below the threshold" do
82
+ inner_app = ->(_env) { MockResponse.new(content: "loop check") }
83
+ fn = FakeFunc.new(name: "fs_read", arguments: '{"path":"x.rb"}')
84
+ messages = 2.times.map { assistant_msg_with_functions([fn]) }
85
+ middleware = Brute::Middleware::DoomLoopDetection.new(inner_app, threshold: 3)
86
+ env = build_env(messages: messages)
87
+ middleware.call(env)
88
+ env[:metadata][:doom_loop_detected].should.be.nil
89
+ end
113
90
 
114
- allow(ctx).to receive(:messages).and_return(double("buffer", to_a: messages))
91
+ it "sets should_exit reason when doom loop detected" do
92
+ inner_app = ->(_env) { MockResponse.new(content: "loop check") }
93
+ fn = FakeFunc.new(name: "fs_read", arguments: '{"path":"x.rb"}')
94
+ messages = 4.times.map { assistant_msg_with_functions([fn]) }
95
+ middleware = Brute::Middleware::DoomLoopDetection.new(inner_app, threshold: 3)
96
+ env = build_env(messages: messages)
97
+ middleware.call(env)
98
+ env[:should_exit][:reason].should == "doom_loop_detected"
99
+ end
115
100
 
116
- middleware = described_class.new(inner_app, threshold: 3)
117
- env = build_env(context: ctx, provider: provider)
101
+ it "does not set should_exit when no loop detected" do
102
+ inner_app = ->(_env) { MockResponse.new(content: "loop check") }
103
+ middleware = Brute::Middleware::DoomLoopDetection.new(inner_app, threshold: 3)
104
+ env = build_env
105
+ middleware.call(env)
106
+ env[:should_exit].should.be.nil
107
+ end
118
108
 
119
- middleware.call(env)
109
+ it "does not overwrite should_exit if already set" do
110
+ inner_app = ->(_env) { MockResponse.new(content: "loop check") }
111
+ fn = FakeFunc.new(name: "fs_read", arguments: '{"path":"x.rb"}')
112
+ messages = 4.times.map { assistant_msg_with_functions([fn]) }
113
+ middleware = Brute::Middleware::DoomLoopDetection.new(inner_app, threshold: 3)
114
+ existing = { reason: "other", message: "earlier", source: "Other" }
115
+ env = build_env(messages: messages, should_exit: existing)
116
+ middleware.call(env)
117
+ env[:should_exit][:reason].should == "other"
118
+ end
120
119
 
121
- expect(env[:metadata][:doom_loop_detected]).to be_nil
122
- end
120
+ it "appends a warning message when loop detected" do
121
+ inner_app = ->(_env) { MockResponse.new(content: "loop check") }
122
+ fn = FakeFunc.new(name: "fs_read", arguments: '{"path":"x.rb"}')
123
+ messages = 4.times.map { assistant_msg_with_functions([fn]) }
124
+ middleware = Brute::Middleware::DoomLoopDetection.new(inner_app, threshold: 3)
125
+ env = build_env(messages: messages)
126
+ original_count = env[:messages].size
127
+ middleware.call(env)
128
+ env[:messages].size.should == original_count + 1
129
+ end
123
130
 
124
- describe Brute::DoomLoopDetector do
125
- it "generates a warning message with repetition count" do
126
- detector = described_class.new(threshold: 3)
127
- msg = detector.warning_message(5)
128
- expect(msg).to include("Doom loop detected")
129
- expect(msg).to include("5 times")
130
- end
131
- end
131
+ it "generates warning message with repetition count" do
132
+ detector = Brute::Loop::DoomLoopDetector.new(threshold: 3)
133
+ msg = detector.warning_message(5)
134
+ msg.should =~ /5 times/
132
135
  end
133
136
  end
@@ -1,17 +1,35 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "bundler/setup"
4
+ require "brute"
5
+
3
6
  module Brute
4
7
  module Middleware
5
8
  # The terminal "app" in the pipeline — performs the actual LLM call.
6
9
  #
10
+ # Builds a fresh LLM::Context per call from env[:messages], makes the
11
+ # call, extracts new messages back into env[:messages], and stashes
12
+ # pending functions in env[:pending_functions].
13
+ #
7
14
  # When streaming, on_content fires incrementally via AgentStream.
8
15
  # When not streaming, fires on_content post-hoc with the full text.
9
16
  #
10
17
  class LLMCall
11
18
  def call(env)
12
- ctx = env[:context]
19
+ ctx = build_context(env)
20
+
21
+ # Load existing conversation history into the ephemeral context
22
+ ctx.messages.concat(env[:messages])
23
+
13
24
  response = ctx.talk(env[:input])
14
25
 
26
+ # Extract new messages appended by talk() and store them
27
+ new_messages = ctx.messages.to_a.drop(env[:messages].size)
28
+ env[:messages].concat(new_messages)
29
+
30
+ # Stash pending functions for the agent loop
31
+ env[:pending_functions] = ctx.functions.to_a
32
+
15
33
  # Only fire on_content post-hoc when NOT streaming
16
34
  # (streaming delivers chunks incrementally via AgentStream)
17
35
  unless env[:streaming]
@@ -26,6 +44,14 @@ module Brute
26
44
 
27
45
  private
28
46
 
47
+ def build_context(env)
48
+ params = {}
49
+ params[:tools] = env[:tools] if env[:tools]&.any?
50
+ params[:stream] = env[:stream] if env[:stream]
51
+ params[:model] = env[:model] if env[:model]
52
+ LLM::Context.new(env[:provider], **params)
53
+ end
54
+
29
55
  # Safely extract text content from an LLM response.
30
56
  # Returns nil when the response contains only tool calls (no assistant text),
31
57
  # which causes LLM::Contract::Completion#content to raise NoMethodError
@@ -40,91 +66,63 @@ module Brute
40
66
  end
41
67
  end
42
68
 
43
- if __FILE__ == $0
44
- require_relative "../../../spec/spec_helper"
45
-
46
- RSpec.describe Brute::Middleware::LLMCall do
47
- let(:provider) { MockProvider.new }
48
- let(:middleware) { described_class.new }
49
-
50
- it "calls ctx.talk with env[:input] and returns the response" do
51
- ctx = LLM::Context.new(provider, tools: [])
52
- prompt = ctx.prompt { |p| p.system("sys"); p.user("hello") }
53
- env = build_env(context: ctx, provider: provider, input: prompt, streaming: false)
54
-
55
- response = middleware.call(env)
56
-
57
- expect(response).not_to be_nil
58
- expect(provider.calls.size).to eq(1)
59
- end
60
-
61
- context "when not streaming" do
62
- it "fires on_content callback with the response text" do
63
- received_content = nil
64
- callback = ->(text) { received_content = text }
69
+ test do
70
+ require_relative "../../../spec/support/mock_provider"
71
+ require_relative "../../../spec/support/mock_response"
65
72
 
66
- response = MockResponse.new(content: "Hello world")
67
- allow(provider).to receive(:complete).and_return(response)
68
-
69
- ctx = LLM::Context.new(provider, tools: [])
70
- prompt = ctx.prompt { |p| p.system("sys"); p.user("hi") }
71
- env = build_env(
72
- context: ctx,
73
- provider: provider,
74
- input: prompt,
75
- streaming: false,
76
- callbacks: { on_content: callback }
77
- )
78
-
79
- middleware.call(env)
73
+ def build_env(**overrides)
74
+ { provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
75
+ messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
76
+ tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
77
+ end
80
78
 
81
- expect(received_content).to eq("Hello world")
82
- end
83
- end
79
+ it "calls the provider and returns a response" do
80
+ provider = MockProvider.new
81
+ middleware = Brute::Middleware::LLMCall.new
82
+ env = build_env(provider: provider, input: "hello", streaming: false)
83
+ response = middleware.call(env)
84
+ response.should.not.be.nil
85
+ end
84
86
 
85
- context "when streaming" do
86
- it "does not fire on_content callback" do
87
- callback_called = false
88
- callback = ->(_text) { callback_called = true }
87
+ it "records a call on the provider" do
88
+ provider = MockProvider.new
89
+ middleware = Brute::Middleware::LLMCall.new
90
+ env = build_env(provider: provider, input: "hello", streaming: false)
91
+ middleware.call(env)
92
+ provider.calls.size.should == 1
93
+ end
89
94
 
90
- ctx = LLM::Context.new(provider, tools: [])
91
- prompt = ctx.prompt { |p| p.system("sys"); p.user("hi") }
92
- env = build_env(
93
- context: ctx,
94
- provider: provider,
95
- input: prompt,
96
- streaming: true,
97
- callbacks: { on_content: callback }
98
- )
95
+ it "appends new messages to env[:messages]" do
96
+ provider = MockProvider.new
97
+ middleware = Brute::Middleware::LLMCall.new
98
+ env = build_env(provider: provider, input: "hello", streaming: false)
99
+ middleware.call(env)
100
+ env[:messages].should.not.be.empty
101
+ end
99
102
 
100
- middleware.call(env)
103
+ it "populates env[:pending_functions] as an Array" do
104
+ provider = MockProvider.new
105
+ middleware = Brute::Middleware::LLMCall.new
106
+ env = build_env(provider: provider, input: "hello", streaming: false)
107
+ middleware.call(env)
108
+ env[:pending_functions].should.be.kind_of(Array)
109
+ end
101
110
 
102
- expect(callback_called).to be false
103
- end
104
- end
111
+ it "does not fire on_content callback when streaming" do
112
+ provider = MockProvider.new
113
+ middleware = Brute::Middleware::LLMCall.new
114
+ called = false
115
+ env = build_env(provider: provider, input: "hi", streaming: true, callbacks: { on_content: ->(_) { called = true } })
116
+ middleware.call(env)
117
+ called.should.be.false
118
+ end
105
119
 
106
- context "when response content raises NoMethodError (tool-only response)" do
107
- it "does not crash and does not fire on_content" do
108
- received_content = :not_called
109
- callback = ->(text) { received_content = text }
110
-
111
- bad_response = MockResponse.new(content: "")
112
- allow(bad_response).to receive(:content).and_raise(NoMethodError)
113
- allow(provider).to receive(:complete).and_return(bad_response)
114
-
115
- ctx = LLM::Context.new(provider, tools: [])
116
- prompt = ctx.prompt { |p| p.system("sys"); p.user("hi") }
117
- env = build_env(
118
- context: ctx,
119
- provider: provider,
120
- input: prompt,
121
- streaming: false,
122
- callbacks: { on_content: callback }
123
- )
124
-
125
- expect { middleware.call(env) }.not_to raise_error
126
- expect(received_content).to eq(:not_called)
127
- end
128
- end
120
+ it "preserves existing messages across calls" do
121
+ provider = MockProvider.new
122
+ middleware = Brute::Middleware::LLMCall.new
123
+ existing = LLM::Message.new(:user, "previous")
124
+ env = build_env(provider: provider, input: "hello", streaming: false, messages: [existing])
125
+ middleware.call(env)
126
+ env[:messages].first.should == existing
129
127
  end
130
128
  end