brute 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/lib/brute/agent_stream.rb +118 -0
  3. data/lib/brute/diff.rb +34 -0
  4. data/lib/brute/message_store.rb +194 -0
  5. data/lib/brute/middleware/compaction_check.rb +133 -0
  6. data/lib/brute/middleware/doom_loop_detection.rb +100 -0
  7. data/lib/brute/middleware/llm_call.rb +89 -0
  8. data/lib/brute/middleware/message_tracking.rb +177 -0
  9. data/lib/brute/middleware/otel/span.rb +111 -0
  10. data/lib/brute/middleware/otel/token_usage.rb +93 -0
  11. data/lib/brute/middleware/otel/tool_calls.rb +113 -0
  12. data/lib/brute/middleware/otel/tool_results.rb +92 -0
  13. data/lib/brute/middleware/otel.rb +5 -0
  14. data/lib/brute/middleware/reasoning_normalizer.rb +119 -0
  15. data/lib/brute/middleware/retry.rb +93 -0
  16. data/lib/brute/middleware/session_persistence.rb +42 -0
  17. data/lib/brute/middleware/token_tracking.rb +77 -0
  18. data/lib/brute/middleware/tool_error_tracking.rb +101 -0
  19. data/lib/brute/middleware/tool_use_guard.rb +69 -0
  20. data/lib/brute/middleware/tracing.rb +71 -0
  21. data/lib/brute/orchestrator.rb +160 -1
  22. data/lib/brute/patches/buffer_nil_guard.rb +5 -0
  23. data/lib/brute/pipeline.rb +135 -0
  24. data/lib/brute/prompts/build_switch.rb +33 -0
  25. data/lib/brute/prompts/environment.rb +47 -0
  26. data/lib/brute/prompts/identity.rb +36 -0
  27. data/lib/brute/prompts/instructions.rb +24 -0
  28. data/lib/brute/prompts/max_steps.rb +32 -0
  29. data/lib/brute/prompts/plan_reminder.rb +33 -0
  30. data/lib/brute/prompts/skills.rb +35 -0
  31. data/lib/brute/providers/opencode_go.rb +5 -0
  32. data/lib/brute/providers/opencode_zen.rb +7 -2
  33. data/lib/brute/providers/shell_response.rb +5 -0
  34. data/lib/brute/system_prompt.rb +214 -0
  35. data/lib/brute/tools/delegate.rb +129 -0
  36. data/lib/brute/tools/fs_patch.rb +53 -0
  37. data/lib/brute/tools/fs_read.rb +5 -0
  38. data/lib/brute/tools/fs_remove.rb +5 -0
  39. data/lib/brute/tools/fs_search.rb +5 -0
  40. data/lib/brute/tools/fs_undo.rb +5 -0
  41. data/lib/brute/tools/fs_write.rb +50 -0
  42. data/lib/brute/tools/net_fetch.rb +5 -0
  43. data/lib/brute/tools/question.rb +5 -0
  44. data/lib/brute/tools/shell.rb +5 -0
  45. data/lib/brute/tools/todo_read.rb +5 -0
  46. data/lib/brute/tools/todo_write.rb +5 -0
  47. data/lib/brute/version.rb +1 -1
  48. data/lib/brute.rb +8 -8
  49. metadata +1 -1
@@ -1,5 +1,10 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ if __FILE__ == $0
4
+ require "bundler/setup"
5
+ require "brute"
6
+ end
7
+
3
8
  module Brute
4
9
  module Middleware
5
10
  module OTel
@@ -35,3 +40,90 @@ module Brute
35
40
  end
36
41
  end
37
42
  end
43
+
44
+ if __FILE__ == $0
45
+ require_relative "../../../../spec/spec_helper"
46
+
47
+ RSpec.describe Brute::Middleware::OTel::ToolResults do
48
+ let(:response) { MockResponse.new(content: "processed") }
49
+ let(:inner_app) { ->(_env) { response } }
50
+ let(:middleware) { described_class.new(inner_app) }
51
+
52
+ it "passes the response through unchanged" do
53
+ env = build_env
54
+ result = middleware.call(env)
55
+ expect(result).to eq(response)
56
+ end
57
+
58
+ context "when env[:span] is nil" do
59
+ it "passes through without error" do
60
+ results = [["fs_read", { content: "data" }]]
61
+ env = build_env(tool_results: results)
62
+
63
+ result = middleware.call(env)
64
+ expect(result).to eq(response)
65
+ end
66
+ end
67
+
68
+ context "when env[:span] is present" do
69
+ let(:span) { mock_span }
70
+
71
+ it "does nothing when tool_results is nil" do
72
+ env = build_env(span: span, tool_results: nil)
73
+ middleware.call(env)
74
+
75
+ expect(span).not_to have_received(:add_event)
76
+ expect(span).not_to have_received(:set_attribute)
77
+ end
78
+
79
+ it "records a tool_result event per result" do
80
+ results = [
81
+ ["fs_read", { content: "file data" }],
82
+ ["shell", { output: "ok" }],
83
+ ]
84
+ env = build_env(span: span, tool_results: results)
85
+ middleware.call(env)
86
+
87
+ expect(span).to have_received(:set_attribute).with("brute.tool_results.count", 2)
88
+ expect(span).to have_received(:add_event).with(
89
+ "tool_result",
90
+ attributes: hash_including("tool.name" => "fs_read", "tool.status" => "ok")
91
+ )
92
+ expect(span).to have_received(:add_event).with(
93
+ "tool_result",
94
+ attributes: hash_including("tool.name" => "shell", "tool.status" => "ok")
95
+ )
96
+ end
97
+
98
+ it "records error status and message for failed tool results" do
99
+ results = [
100
+ ["fs_read", { error: "not found" }],
101
+ ]
102
+ env = build_env(span: span, tool_results: results)
103
+ middleware.call(env)
104
+
105
+ expect(span).to have_received(:add_event).with(
106
+ "tool_result",
107
+ attributes: hash_including(
108
+ "tool.name" => "fs_read",
109
+ "tool.status" => "error",
110
+ "tool.error" => "not found"
111
+ )
112
+ )
113
+ end
114
+
115
+ it "handles a mix of successful and failed results" do
116
+ results = [
117
+ ["fs_read", { content: "ok" }],
118
+ ["shell", { error: "exit code 1" }],
119
+ ["fs_write", { success: true }],
120
+ ]
121
+ env = build_env(span: span, tool_results: results)
122
+ middleware.call(env)
123
+
124
+ expect(span).to have_received(:set_attribute).with("brute.tool_results.count", 3)
125
+ expect(span).to have_received(:add_event).exactly(3).times
126
+ end
127
+ end
128
+ end
129
+ end
@@ -1,5 +1,10 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ if __FILE__ == $0
4
+ require "bundler/setup"
5
+ require "brute"
6
+ end
7
+
3
8
  module Brute
4
9
  module Middleware
5
10
  # OpenTelemetry instrumentation for the LLM pipeline.
@@ -1,5 +1,10 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ if __FILE__ == $0
4
+ require "bundler/setup"
5
+ require "brute"
6
+ end
7
+
3
8
  module Brute
4
9
  module Middleware
5
10
  # Handles reasoning/thinking content across model switches.
@@ -96,3 +101,117 @@ module Brute
96
101
  end
97
102
  end
98
103
  end
104
+
105
+ if __FILE__ == $0
106
+ require_relative "../../../spec/spec_helper"
107
+
108
+ RSpec.describe Brute::Middleware::ReasoningNormalizer do
109
+ let(:response) { MockResponse.new(content: "reasoned response") }
110
+ let(:inner_app) { ->(_env) { response } }
111
+
112
+ # Build a provider whose class name contains the given string.
113
+ def make_provider(type_name)
114
+ klass = Class.new do
115
+ define_method(:name) { :mock }
116
+ define_method(:default_model) { "mock-model" }
117
+ define_method(:user_role) { :user }
118
+ define_method(:system_role) { :system }
119
+ define_method(:assistant_role) { :assistant }
120
+ define_method(:tool_role) { :tool }
121
+ define_method(:tracer) { nil }
122
+ define_method(:tracer=) { |*| }
123
+ define_method(:complete) { |*_args, **_kw| MockResponse.new(content: "ok") }
124
+ end
125
+ # Override class name to trigger provider_type detection
126
+ klass.define_method(:class) do
127
+ c = super()
128
+ name_str = "LLM::#{type_name}"
129
+ c.define_singleton_method(:name) { name_str }
130
+ c
131
+ end
132
+ klass.new
133
+ end
134
+
135
+ context "with Anthropic provider and budget_tokens" do
136
+ it "injects thinking param into env[:params]" do
137
+ provider = make_provider("Anthropic")
138
+ middleware = described_class.new(inner_app, model_id: "claude-4", budget_tokens: 8000, enabled: true)
139
+ env = build_env(provider: provider, params: {})
140
+
141
+ middleware.call(env)
142
+
143
+ expect(env[:params][:thinking]).to eq({ type: "enabled", budget_tokens: 8000 })
144
+ end
145
+ end
146
+
147
+ context "with Anthropic provider without budget_tokens" do
148
+ it "does not inject thinking param" do
149
+ provider = make_provider("Anthropic")
150
+ middleware = described_class.new(inner_app, model_id: "claude-4", enabled: true)
151
+ env = build_env(provider: provider, params: {})
152
+
153
+ middleware.call(env)
154
+
155
+ expect(env[:params][:thinking]).to be_nil
156
+ end
157
+ end
158
+
159
+ context "with OpenAI provider" do
160
+ it "injects reasoning_effort based on effort level" do
161
+ provider = make_provider("OpenAI")
162
+ middleware = described_class.new(inner_app, model_id: "o3", effort: :high, enabled: true)
163
+ env = build_env(provider: provider, params: {})
164
+
165
+ middleware.call(env)
166
+
167
+ expect(env[:params][:reasoning_effort]).to eq("high")
168
+ end
169
+
170
+ it "maps effort levels correctly" do
171
+ provider = make_provider("OpenAI")
172
+
173
+ { low: "low", medium: "medium", high: "high", minimal: "low", max: "high" }.each do |effort, expected|
174
+ middleware = described_class.new(inner_app, model_id: "o3", effort: effort, enabled: true)
175
+ env = build_env(provider: provider, params: {})
176
+ middleware.call(env)
177
+ expect(env[:params][:reasoning_effort]).to eq(expected), "Expected effort #{effort} to map to #{expected}"
178
+ end
179
+ end
180
+ end
181
+
182
+ context "with unknown provider" do
183
+ it "does not inject any reasoning params" do
184
+ provider = make_provider("Mistral")
185
+ middleware = described_class.new(inner_app, model_id: "mistral-large", enabled: true)
186
+ env = build_env(provider: provider, params: {})
187
+
188
+ middleware.call(env)
189
+
190
+ expect(env[:params]).to eq({})
191
+ end
192
+ end
193
+
194
+ context "when disabled" do
195
+ it "does not inject reasoning params" do
196
+ provider = make_provider("Anthropic")
197
+ middleware = described_class.new(inner_app, model_id: "claude-4", budget_tokens: 8000, enabled: false)
198
+ env = build_env(provider: provider, params: {})
199
+
200
+ middleware.call(env)
201
+
202
+ expect(env[:params]).to eq({})
203
+ end
204
+ end
205
+
206
+ it "allows model_id to be updated mid-session" do
207
+ middleware = described_class.new(inner_app, model_id: "old-model", enabled: true)
208
+ middleware.model_id = "new-model"
209
+
210
+ provider = make_provider("OpenAI")
211
+ env = build_env(provider: provider, params: {})
212
+ middleware.call(env)
213
+
214
+ expect(env[:params][:reasoning_effort]).not_to be_nil
215
+ end
216
+ end
217
+ end
@@ -1,5 +1,10 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ if __FILE__ == $0
4
+ require "bundler/setup"
5
+ require "brute"
6
+ end
7
+
3
8
  module Brute
4
9
  module Middleware
5
10
  # Retries the inner call on transient LLM errors with exponential backoff.
@@ -43,3 +48,91 @@ module Brute
43
48
  end
44
49
  end
45
50
  end
51
+
52
+ if __FILE__ == $0
53
+ require_relative "../../../spec/spec_helper"
54
+
55
+ RSpec.describe Brute::Middleware::Retry do
56
+ let(:response) { MockResponse.new(content: "success") }
57
+
58
+ it "returns the response on first successful call" do
59
+ app, calls = mock_inner_app(response: response)
60
+ middleware = described_class.new(app)
61
+ env = build_env
62
+
63
+ result = middleware.call(env)
64
+
65
+ expect(result).to eq(response)
66
+ expect(calls.size).to eq(1)
67
+ end
68
+
69
+ it "retries on LLM::RateLimitError and succeeds" do
70
+ app = flaky_inner_app(LLM::RateLimitError, fail_count: 2, response: response)
71
+ middleware = described_class.new(app, max_attempts: 3, base_delay: 2)
72
+ allow(middleware).to receive(:sleep)
73
+ env = build_env
74
+
75
+ result = middleware.call(env)
76
+
77
+ expect(result).to eq(response)
78
+ expect(env[:metadata][:retry_attempt]).to eq(2)
79
+ end
80
+
81
+ it "retries on LLM::ServerError and succeeds" do
82
+ app = flaky_inner_app(LLM::ServerError, fail_count: 1, response: response)
83
+ middleware = described_class.new(app, max_attempts: 3, base_delay: 2)
84
+ allow(middleware).to receive(:sleep)
85
+ env = build_env
86
+
87
+ result = middleware.call(env)
88
+
89
+ expect(result).to eq(response)
90
+ expect(env[:metadata][:retry_attempt]).to eq(1)
91
+ end
92
+
93
+ it "re-raises after exhausting all attempts" do
94
+ app = failing_inner_app(LLM::RateLimitError, message: "rate limited")
95
+ middleware = described_class.new(app, max_attempts: 3, base_delay: 2)
96
+ allow(middleware).to receive(:sleep)
97
+ env = build_env
98
+
99
+ expect { middleware.call(env) }.to raise_error(LLM::RateLimitError, "rate limited")
100
+ expect(env[:metadata][:last_error]).to eq("rate limited")
101
+ end
102
+
103
+ it "does not retry non-retryable errors" do
104
+ call_count = 0
105
+ app = ->(_env) { call_count += 1; raise ArgumentError, "bad input" }
106
+ middleware = described_class.new(app)
107
+ env = build_env
108
+
109
+ expect { middleware.call(env) }.to raise_error(ArgumentError)
110
+ expect(call_count).to eq(1)
111
+ end
112
+
113
+ it "sleeps with exponential backoff delays" do
114
+ app = flaky_inner_app(LLM::RateLimitError, fail_count: 2, response: response)
115
+ middleware = described_class.new(app, max_attempts: 3, base_delay: 2)
116
+ allow(middleware).to receive(:sleep)
117
+ env = build_env
118
+
119
+ middleware.call(env)
120
+
121
+ # base_delay ** attempts: 2**1 = 2, 2**2 = 4
122
+ expect(middleware).to have_received(:sleep).with(2).ordered
123
+ expect(middleware).to have_received(:sleep).with(4).ordered
124
+ end
125
+
126
+ it "records retry_delay in metadata" do
127
+ app = flaky_inner_app(LLM::RateLimitError, fail_count: 1, response: response)
128
+ middleware = described_class.new(app, max_attempts: 3, base_delay: 3)
129
+ allow(middleware).to receive(:sleep)
130
+ env = build_env
131
+
132
+ middleware.call(env)
133
+
134
+ # base_delay ** attempts: 3**1 = 3
135
+ expect(env[:metadata][:retry_delay]).to eq(3)
136
+ end
137
+ end
138
+ end
@@ -1,5 +1,10 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ if __FILE__ == $0
4
+ require "bundler/setup"
5
+ require "brute"
6
+ end
7
+
3
8
  module Brute
4
9
  module Middleware
5
10
  # Saves the conversation to disk after each LLM call.
@@ -27,3 +32,40 @@ module Brute
27
32
  end
28
33
  end
29
34
  end
35
+
36
+ if __FILE__ == $0
37
+ require_relative "../../../spec/spec_helper"
38
+
39
+ RSpec.describe Brute::Middleware::SessionPersistence do
40
+ let(:response) { MockResponse.new(content: "saved response") }
41
+ let(:inner_app) { ->(_env) { response } }
42
+ let(:session) { double("session", save: nil) }
43
+ let(:middleware) { described_class.new(inner_app, session: session) }
44
+
45
+ it "passes the response through unchanged" do
46
+ env = build_env
47
+ result = middleware.call(env)
48
+ expect(result).to eq(response)
49
+ end
50
+
51
+ it "calls session.save with the context after a successful LLM call" do
52
+ env = build_env
53
+ middleware.call(env)
54
+ expect(session).to have_received(:save).with(env[:context])
55
+ end
56
+
57
+ it "does not propagate session save failures" do
58
+ allow(session).to receive(:save).and_raise(RuntimeError, "disk full")
59
+ env = build_env
60
+
61
+ expect { middleware.call(env) }.not_to raise_error
62
+ end
63
+
64
+ it "prints a warning to stderr on save failure" do
65
+ allow(session).to receive(:save).and_raise(RuntimeError, "disk full")
66
+ env = build_env
67
+
68
+ expect { middleware.call(env) }.to output(/Session save failed: disk full/).to_stderr
69
+ end
70
+ end
71
+ end
@@ -1,5 +1,10 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ if __FILE__ == $0
4
+ require "bundler/setup"
5
+ require "brute"
6
+ end
7
+
3
8
  module Brute
4
9
  module Middleware
5
10
  # Tracks cumulative token usage across all LLM calls in a session.
@@ -44,3 +49,75 @@ module Brute
44
49
  end
45
50
  end
46
51
  end
52
+
53
+ if __FILE__ == $0
54
+ require_relative "../../../spec/spec_helper"
55
+
56
+ RSpec.describe Brute::Middleware::TokenTracking do
57
+ let(:response) do
58
+ MockResponse.new(
59
+ content: "hello",
60
+ usage: LLM::Usage.new(input_tokens: 100, output_tokens: 50, reasoning_tokens: 10, total_tokens: 160)
61
+ )
62
+ end
63
+
64
+ let(:inner_app) { ->(_env) { response } }
65
+ let(:middleware) { described_class.new(inner_app) }
66
+
67
+ it "passes the response through unchanged" do
68
+ env = build_env
69
+ result = middleware.call(env)
70
+ expect(result).to eq(response)
71
+ end
72
+
73
+ it "populates env[:metadata][:tokens] with correct values" do
74
+ env = build_env
75
+ middleware.call(env)
76
+
77
+ tokens = env[:metadata][:tokens]
78
+ expect(tokens[:total_input]).to eq(100)
79
+ expect(tokens[:total_output]).to eq(50)
80
+ expect(tokens[:total_reasoning]).to eq(10)
81
+ expect(tokens[:total]).to eq(150) # input + output
82
+ expect(tokens[:call_count]).to eq(1)
83
+ expect(tokens[:last_call]).to eq(input: 100, output: 50, total: 160)
84
+ end
85
+
86
+ it "accumulates token counts across multiple calls" do
87
+ env = build_env
88
+ middleware.call(env)
89
+ middleware.call(env)
90
+
91
+ tokens = env[:metadata][:tokens]
92
+ expect(tokens[:total_input]).to eq(200)
93
+ expect(tokens[:total_output]).to eq(100)
94
+ expect(tokens[:total_reasoning]).to eq(20)
95
+ expect(tokens[:call_count]).to eq(2)
96
+ end
97
+
98
+ it "handles a response without usage gracefully" do
99
+ no_usage_response = double("response")
100
+ allow(no_usage_response).to receive(:respond_to?).with(:usage).and_return(false)
101
+ app = ->(_env) { no_usage_response }
102
+ mw = described_class.new(app)
103
+
104
+ env = build_env
105
+ result = mw.call(env)
106
+
107
+ expect(result).to eq(no_usage_response)
108
+ expect(env[:metadata][:tokens]).to be_nil
109
+ end
110
+
111
+ it "handles a response where usage returns nil" do
112
+ nil_usage_response = double("response", usage: nil)
113
+ allow(nil_usage_response).to receive(:respond_to?).with(:usage).and_return(true)
114
+ app = ->(_env) { nil_usage_response }
115
+ mw = described_class.new(app)
116
+
117
+ env = build_env
118
+ mw.call(env)
119
+
120
+ expect(env[:metadata][:tokens]).to be_nil
121
+ end
122
+ end
123
+ end
@@ -1,5 +1,10 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ if __FILE__ == $0
4
+ require "bundler/setup"
5
+ require "brute"
6
+ end
7
+
3
8
  module Brute
4
9
  module Middleware
5
10
  # Tracks per-tool error counts and total tool call count across LLM
@@ -50,3 +55,99 @@ module Brute
50
55
  end
51
56
  end
52
57
  end
58
+
59
+ if __FILE__ == $0
60
+ require_relative "../../../spec/spec_helper"
61
+
62
+ RSpec.describe Brute::Middleware::ToolErrorTracking do
63
+ let(:response) { MockResponse.new(content: "tracked") }
64
+ let(:inner_app) { ->(_env) { response } }
65
+ let(:middleware) { described_class.new(inner_app, max_failures: 3) }
66
+
67
+ it "passes the response through" do
68
+ env = build_env
69
+ result = middleware.call(env)
70
+ expect(result).to eq(response)
71
+ end
72
+
73
+ it "reports zero tool calls when tool_results is nil" do
74
+ env = build_env(tool_results: nil)
75
+ middleware.call(env)
76
+
77
+ expect(env[:metadata][:tool_calls]).to eq(0)
78
+ expect(env[:metadata][:tool_errors]).to eq({})
79
+ expect(env[:metadata][:tool_error_limit_reached]).to be false
80
+ end
81
+
82
+ it "counts total tool calls from tool_results" do
83
+ results = [
84
+ ["fs_read", { content: "data" }],
85
+ ["shell", { output: "ok" }],
86
+ ["fs_write", { success: true }],
87
+ ]
88
+ env = build_env(tool_results: results)
89
+ middleware.call(env)
90
+
91
+ expect(env[:metadata][:tool_calls]).to eq(3)
92
+ end
93
+
94
+ it "counts per-tool errors from results with error key" do
95
+ results = [
96
+ ["fs_read", { error: "not found" }],
97
+ ["fs_read", { error: "permission denied" }],
98
+ ["shell", { output: "ok" }],
99
+ ]
100
+ env = build_env(tool_results: results)
101
+ middleware.call(env)
102
+
103
+ expect(env[:metadata][:tool_errors]).to eq({ "fs_read" => 2 })
104
+ end
105
+
106
+ it "sets tool_error_limit_reached when a tool hits max_failures" do
107
+ results = [
108
+ ["fs_read", { error: "fail 1" }],
109
+ ["fs_read", { error: "fail 2" }],
110
+ ["fs_read", { error: "fail 3" }],
111
+ ]
112
+ env = build_env(tool_results: results)
113
+ middleware.call(env)
114
+
115
+ expect(env[:metadata][:tool_error_limit_reached]).to be true
116
+ end
117
+
118
+ it "does not flag below the threshold" do
119
+ results = [
120
+ ["fs_read", { error: "fail 1" }],
121
+ ["fs_read", { error: "fail 2" }],
122
+ ]
123
+ env = build_env(tool_results: results)
124
+ middleware.call(env)
125
+
126
+ expect(env[:metadata][:tool_error_limit_reached]).to be false
127
+ end
128
+
129
+ it "accumulates counts across multiple calls" do
130
+ env1 = build_env(tool_results: [["fs_read", { error: "fail" }]])
131
+ middleware.call(env1)
132
+
133
+ env2 = build_env(tool_results: [["fs_read", { error: "fail again" }], ["shell", { output: "ok" }]])
134
+ middleware.call(env2)
135
+
136
+ expect(env2[:metadata][:tool_calls]).to eq(3) # 1 + 2
137
+ expect(env2[:metadata][:tool_errors]).to eq({ "fs_read" => 2 })
138
+ end
139
+
140
+ it "clears counters on reset!" do
141
+ env = build_env(tool_results: [["fs_read", { error: "fail" }]])
142
+ middleware.call(env)
143
+
144
+ middleware.reset!
145
+
146
+ env2 = build_env(tool_results: nil)
147
+ middleware.call(env2)
148
+
149
+ expect(env2[:metadata][:tool_calls]).to eq(0)
150
+ expect(env2[:metadata][:tool_errors]).to eq({})
151
+ end
152
+ end
153
+ end
@@ -95,3 +95,72 @@ module Brute
95
95
  end
96
96
  end
97
97
  end
98
+
99
+ if __FILE__ == $0
100
+ require_relative "../../../spec/spec_helper"
101
+
102
+ RSpec.describe Brute::Middleware::ToolUseGuard do
103
+ let(:provider) { MockProvider.new }
104
+
105
+ # Helper: build a response that produces pending tool calls (functions) in the context.
106
+ def make_tool_response(tool_calls:)
107
+ MockResponse.new(content: "", tool_calls: tool_calls)
108
+ end
109
+
110
+ it "passes the response through when there are no pending functions" do
111
+ response = MockResponse.new(content: "no tools")
112
+ allow(provider).to receive(:complete).and_return(response)
113
+
114
+ ctx = LLM::Context.new(provider, tools: [])
115
+ prompt = ctx.prompt { |p| p.system("sys"); p.user("hi") }
116
+
117
+ inner_app = ->(_env) { ctx.talk(prompt); response }
118
+ middleware = described_class.new(inner_app)
119
+ env = build_env(context: ctx, provider: provider)
120
+
121
+ result = middleware.call(env)
122
+ expect(result).to eq(response)
123
+ end
124
+
125
+ it "does not inject a synthetic message when the assistant message already has tool_call?" do
126
+ tool_calls = [{ id: "toolu_1", name: "fs_read", arguments: { "path" => "test.rb" } }]
127
+ response = make_tool_response(tool_calls: tool_calls)
128
+ allow(provider).to receive(:complete).and_return(response)
129
+
130
+ ctx = LLM::Context.new(provider, tools: [])
131
+ prompt = ctx.prompt { |p| p.system("sys"); p.user("read it") }
132
+
133
+ inner_app = ->(_env) { ctx.talk(prompt); response }
134
+ middleware = described_class.new(inner_app)
135
+ env = build_env(context: ctx, provider: provider)
136
+
137
+ middleware.call(env)
138
+
139
+ messages = ctx.messages.to_a
140
+ assistant_msgs = messages.select { |m| m.role.to_s == "assistant" }
141
+ # Should only have the original assistant message, no synthetic
142
+ expect(assistant_msgs.size).to eq(1)
143
+ end
144
+
145
+ it "injects a synthetic assistant message when tool calls exist but assistant is missing" do
146
+ tool_calls = [{ id: "toolu_1", name: "fs_read", arguments: { "path" => "test.rb" } }]
147
+ response = MockResponse.new(content: "")
148
+ # Simulate the bug: choices[-1] is nil, so no assistant message stored
149
+ allow(response).to receive(:choices).and_return([nil])
150
+ allow(provider).to receive(:complete).and_return(response)
151
+
152
+ ctx = LLM::Context.new(provider, tools: [])
153
+ prompt = ctx.prompt { |p| p.system("sys"); p.user("read it") }
154
+
155
+ inner_app = ->(_env) do
156
+ ctx.talk(prompt)
157
+ response
158
+ end
159
+
160
+ middleware = described_class.new(inner_app)
161
+ env = build_env(context: ctx, provider: provider)
162
+
163
+ expect { middleware.call(env) }.not_to raise_error
164
+ end
165
+ end
166
+ end