brute 0.4.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/lib/brute/agent.rb +14 -0
  3. data/lib/brute/diff.rb +24 -0
  4. data/lib/brute/loop/agent_stream.rb +118 -0
  5. data/lib/brute/loop/agent_turn.rb +520 -0
  6. data/lib/brute/{compactor.rb → loop/compactor.rb} +2 -0
  7. data/lib/brute/{doom_loop.rb → loop/doom_loop.rb} +2 -0
  8. data/lib/brute/loop/step.rb +332 -0
  9. data/lib/brute/loop/tool_call_step.rb +90 -0
  10. data/lib/brute/middleware/compaction_check.rb +70 -23
  11. data/lib/brute/middleware/doom_loop_detection.rb +110 -7
  12. data/lib/brute/middleware/llm_call.rb +88 -1
  13. data/lib/brute/middleware/message_tracking.rb +140 -10
  14. data/lib/brute/middleware/otel/span.rb +32 -2
  15. data/lib/brute/middleware/otel/token_usage.rb +38 -0
  16. data/lib/brute/middleware/otel/tool_calls.rb +30 -1
  17. data/lib/brute/middleware/otel/tool_results.rb +29 -1
  18. data/lib/brute/middleware/otel.rb +5 -0
  19. data/lib/brute/middleware/reasoning_normalizer.rb +94 -0
  20. data/lib/brute/middleware/retry.rb +113 -1
  21. data/lib/brute/middleware/session_persistence.rb +46 -3
  22. data/lib/brute/middleware/token_tracking.rb +78 -0
  23. data/lib/brute/middleware/tool_error_tracking.rb +128 -1
  24. data/lib/brute/middleware/tool_use_guard.rb +64 -28
  25. data/lib/brute/middleware/tracing.rb +63 -2
  26. data/lib/brute/middleware.rb +18 -0
  27. data/lib/brute/orchestrator/turn.rb +105 -0
  28. data/lib/brute/patches/buffer_nil_guard.rb +5 -0
  29. data/lib/brute/pipeline.rb +86 -7
  30. data/lib/brute/prompts/build_switch.rb +29 -0
  31. data/lib/brute/prompts/environment.rb +43 -0
  32. data/lib/brute/prompts/identity.rb +29 -0
  33. data/lib/brute/prompts/instructions.rb +21 -0
  34. data/lib/brute/prompts/max_steps.rb +25 -0
  35. data/lib/brute/prompts/plan_reminder.rb +25 -0
  36. data/lib/brute/prompts/skills.rb +13 -0
  37. data/lib/brute/prompts.rb +28 -0
  38. data/lib/brute/providers/ollama.rb +135 -0
  39. data/lib/brute/providers/opencode_go.rb +5 -0
  40. data/lib/brute/providers/opencode_zen.rb +7 -2
  41. data/lib/brute/providers/shell.rb +2 -2
  42. data/lib/brute/providers/shell_response.rb +7 -2
  43. data/lib/brute/providers.rb +62 -0
  44. data/lib/brute/queue/base_queue.rb +222 -0
  45. data/lib/brute/{file_mutation_queue.rb → queue/file_mutation_queue.rb} +28 -26
  46. data/lib/brute/queue/parallel_queue.rb +66 -0
  47. data/lib/brute/queue/sequential_queue.rb +63 -0
  48. data/lib/brute/{message_store.rb → store/message_store.rb} +155 -62
  49. data/lib/brute/store/session.rb +106 -0
  50. data/lib/brute/{snapshot_store.rb → store/snapshot_store.rb} +2 -0
  51. data/lib/brute/{todo_store.rb → store/todo_store.rb} +2 -0
  52. data/lib/brute/system_prompt.rb +101 -0
  53. data/lib/brute/tools/delegate.rb +59 -0
  54. data/lib/brute/tools/fs_patch.rb +54 -2
  55. data/lib/brute/tools/fs_read.rb +5 -0
  56. data/lib/brute/tools/fs_remove.rb +7 -2
  57. data/lib/brute/tools/fs_search.rb +5 -0
  58. data/lib/brute/tools/fs_undo.rb +7 -2
  59. data/lib/brute/tools/fs_write.rb +40 -2
  60. data/lib/brute/tools/net_fetch.rb +5 -0
  61. data/lib/brute/tools/question.rb +5 -0
  62. data/lib/brute/tools/shell.rb +5 -0
  63. data/lib/brute/tools/todo_read.rb +6 -1
  64. data/lib/brute/tools/todo_write.rb +6 -1
  65. data/lib/brute/tools.rb +31 -0
  66. data/lib/brute/version.rb +1 -1
  67. data/lib/brute.rb +40 -204
  68. metadata +31 -20
  69. data/lib/brute/agent_stream.rb +0 -63
  70. data/lib/brute/hooks.rb +0 -84
  71. data/lib/brute/orchestrator.rb +0 -391
  72. data/lib/brute/session.rb +0 -161
@@ -1,5 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "bundler/setup"
4
+ require "brute"
5
+
3
6
  module Brute
4
7
  module Middleware
5
8
  # Handles reasoning/thinking content across model switches.
@@ -96,3 +99,94 @@ module Brute
96
99
  end
97
100
  end
98
101
  end
102
+
103
+ test do
104
+ require_relative "../../../spec/support/mock_provider"
105
+ require_relative "../../../spec/support/mock_response"
106
+
107
+ def build_env(**overrides)
108
+ { provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
109
+ messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
110
+ tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
111
+ end
112
+
113
+ def make_provider(type_name)
114
+ klass = Class.new do
115
+ define_method(:name) { :mock }
116
+ define_method(:default_model) { "mock-model" }
117
+ define_method(:user_role) { :user }
118
+ define_method(:system_role) { :system }
119
+ define_method(:assistant_role) { :assistant }
120
+ define_method(:tool_role) { :tool }
121
+ define_method(:tracer) { nil }
122
+ define_method(:tracer=) { |*| }
123
+ define_method(:complete) { |*_args, **_kw| MockResponse.new(content: "ok") }
124
+ end
125
+ klass.define_method(:class) do
126
+ c = super()
127
+ name_str = "LLM::#{type_name}"
128
+ c.define_singleton_method(:name) { name_str }
129
+ c
130
+ end
131
+ klass.new
132
+ end
133
+
134
+ inner_app = ->(_env) { MockResponse.new(content: "reasoned response") }
135
+
136
+ it "injects thinking param for Anthropic with budget_tokens" do
137
+ provider = make_provider("Anthropic")
138
+ middleware = Brute::Middleware::ReasoningNormalizer.new(inner_app, model_id: "claude-4", budget_tokens: 8000, enabled: true)
139
+ env = build_env(provider: provider, params: {})
140
+ middleware.call(env)
141
+ env[:params][:thinking].should == { type: "enabled", budget_tokens: 8000 }
142
+ end
143
+
144
+ it "does not inject thinking param for Anthropic without budget_tokens" do
145
+ provider = make_provider("Anthropic")
146
+ middleware = Brute::Middleware::ReasoningNormalizer.new(inner_app, model_id: "claude-4", enabled: true)
147
+ env = build_env(provider: provider, params: {})
148
+ middleware.call(env)
149
+ env[:params][:thinking].should.be.nil
150
+ end
151
+
152
+ it "injects reasoning_effort for OpenAI" do
153
+ provider = make_provider("OpenAI")
154
+ middleware = Brute::Middleware::ReasoningNormalizer.new(inner_app, model_id: "o3", effort: :high, enabled: true)
155
+ env = build_env(provider: provider, params: {})
156
+ middleware.call(env)
157
+ env[:params][:reasoning_effort].should == "high"
158
+ end
159
+
160
+ it "maps low effort correctly for OpenAI" do
161
+ provider = make_provider("OpenAI")
162
+ middleware = Brute::Middleware::ReasoningNormalizer.new(inner_app, model_id: "o3", effort: :low, enabled: true)
163
+ env = build_env(provider: provider, params: {})
164
+ middleware.call(env)
165
+ env[:params][:reasoning_effort].should == "low"
166
+ end
167
+
168
+ it "does not inject params for unknown provider" do
169
+ provider = make_provider("Mistral")
170
+ middleware = Brute::Middleware::ReasoningNormalizer.new(inner_app, model_id: "mistral-large", enabled: true)
171
+ env = build_env(provider: provider, params: {})
172
+ middleware.call(env)
173
+ env[:params].should == {}
174
+ end
175
+
176
+ it "does not inject params when disabled" do
177
+ provider = make_provider("Anthropic")
178
+ middleware = Brute::Middleware::ReasoningNormalizer.new(inner_app, model_id: "claude-4", budget_tokens: 8000, enabled: false)
179
+ env = build_env(provider: provider, params: {})
180
+ middleware.call(env)
181
+ env[:params].should == {}
182
+ end
183
+
184
+ it "allows model_id to be updated mid-session" do
185
+ middleware = Brute::Middleware::ReasoningNormalizer.new(inner_app, model_id: "old", enabled: true)
186
+ middleware.model_id = "new"
187
+ provider = make_provider("OpenAI")
188
+ env = build_env(provider: provider, params: {})
189
+ middleware.call(env)
190
+ env[:params][:reasoning_effort].should.not.be.nil
191
+ end
192
+ end
@@ -1,5 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "bundler/setup"
4
+ require "brute"
5
+
3
6
  module Brute
4
7
  module Middleware
5
8
  # Retries the inner call on transient LLM errors with exponential backoff.
@@ -9,7 +12,7 @@ module Brute
9
12
  # propagate immediately.
10
13
  #
11
14
  # Unlike forgecode's separate retry.rs, this middleware wraps the LLM call
12
- # directly — it sees the error and retries without the orchestrator knowing.
15
+ # directly — it sees the error and retries without the agent loop knowing.
13
16
  #
14
17
  class Retry < Base
15
18
  DEFAULT_MAX_ATTEMPTS = 3
@@ -43,3 +46,112 @@ module Brute
43
46
  end
44
47
  end
45
48
  end
49
+
50
+ test do
51
+ require_relative "../../../spec/support/mock_provider"
52
+ require_relative "../../../spec/support/mock_response"
53
+
54
+ def build_env(**overrides)
55
+ { provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
56
+ messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
57
+ tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
58
+ end
59
+
60
+ def mock_inner_app(response:)
61
+ calls = []
62
+ app = ->(env) { calls << env; response }
63
+ [app, calls]
64
+ end
65
+
66
+ def flaky_inner_app(error_class, fail_count:, response:)
67
+ attempt = 0
68
+ ->(env) { attempt += 1; raise error_class, "transient" if attempt <= fail_count; response }
69
+ end
70
+
71
+ def no_sleep_retry(*args, **kwargs)
72
+ mw = Brute::Middleware::Retry.new(*args, **kwargs)
73
+ mw.define_singleton_method(:sleep) { |_| }
74
+ mw
75
+ end
76
+
77
+ it "returns the response on first successful call" do
78
+ response = MockResponse.new(content: "success")
79
+ app, calls = mock_inner_app(response: response)
80
+ middleware = Brute::Middleware::Retry.new(app)
81
+ result = middleware.call(build_env)
82
+ result.should == response
83
+ end
84
+
85
+ it "calls inner app exactly once on success" do
86
+ response = MockResponse.new(content: "success")
87
+ app, calls = mock_inner_app(response: response)
88
+ Brute::Middleware::Retry.new(app).call(build_env)
89
+ calls.size.should == 1
90
+ end
91
+
92
+ it "retries on LLM::RateLimitError and succeeds" do
93
+ response = MockResponse.new(content: "success")
94
+ app = flaky_inner_app(LLM::RateLimitError, fail_count: 2, response: response)
95
+ middleware = no_sleep_retry(app, max_attempts: 3, base_delay: 2)
96
+ env = build_env
97
+ result = middleware.call(env)
98
+ result.should == response
99
+ end
100
+
101
+ it "records retry_attempt in metadata after retries" do
102
+ response = MockResponse.new(content: "success")
103
+ app = flaky_inner_app(LLM::RateLimitError, fail_count: 2, response: response)
104
+ middleware = no_sleep_retry(app, max_attempts: 3, base_delay: 2)
105
+ env = build_env
106
+ middleware.call(env)
107
+ env[:metadata][:retry_attempt].should == 2
108
+ end
109
+
110
+ it "retries on LLM::ServerError and succeeds" do
111
+ response = MockResponse.new(content: "success")
112
+ app = flaky_inner_app(LLM::ServerError, fail_count: 1, response: response)
113
+ middleware = no_sleep_retry(app, max_attempts: 3, base_delay: 2)
114
+ result = middleware.call(build_env)
115
+ result.should == response
116
+ end
117
+
118
+ it "re-raises after exhausting all attempts" do
119
+ app = ->(_env) { raise LLM::RateLimitError, "rate limited" }
120
+ middleware = no_sleep_retry(app, max_attempts: 3, base_delay: 2)
121
+ lambda { middleware.call(build_env) }.should.raise(LLM::RateLimitError)
122
+ end
123
+
124
+ it "does not retry non-retryable errors" do
125
+ call_count = 0
126
+ app = ->(_env) { call_count += 1; raise ArgumentError, "bad input" }
127
+ middleware = Brute::Middleware::Retry.new(app)
128
+ lambda { middleware.call(build_env) }.should.raise(ArgumentError)
129
+ end
130
+
131
+ it "only calls inner app once for non-retryable errors" do
132
+ call_count = 0
133
+ app = ->(_env) { call_count += 1; raise ArgumentError, "bad input" }
134
+ middleware = Brute::Middleware::Retry.new(app)
135
+ begin; middleware.call(build_env); rescue ArgumentError; end
136
+ call_count.should == 1
137
+ end
138
+
139
+ it "records retry_delay in metadata" do
140
+ response = MockResponse.new(content: "success")
141
+ app = flaky_inner_app(LLM::RateLimitError, fail_count: 1, response: response)
142
+ middleware = no_sleep_retry(app, max_attempts: 3, base_delay: 3)
143
+ env = build_env
144
+ middleware.call(env)
145
+ env[:metadata][:retry_delay].should == 3
146
+ end
147
+
148
+ it "tracks sleep delays for exponential backoff" do
149
+ response = MockResponse.new(content: "success")
150
+ app = flaky_inner_app(LLM::RateLimitError, fail_count: 2, response: response)
151
+ delays = []
152
+ mw = Brute::Middleware::Retry.new(app, max_attempts: 3, base_delay: 2)
153
+ mw.define_singleton_method(:sleep) { |d| delays << d }
154
+ mw.call(build_env)
155
+ delays.should == [2, 4]
156
+ end
157
+ end
@@ -1,11 +1,15 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "bundler/setup"
4
+ require "brute"
5
+
3
6
  module Brute
4
7
  module Middleware
5
8
  # Saves the conversation to disk after each LLM call.
6
9
  #
7
- # Runs POST-call: delegates to Session#save. Failures are non-fatal —
8
- # a broken session save should never crash the agent loop.
10
+ # Runs POST-call: serializes env[:messages] via Session#save_messages.
11
+ # Failures are non-fatal — a broken session save should never crash
12
+ # the agent loop.
9
13
  #
10
14
  class SessionPersistence < Base
11
15
  def initialize(app, session:)
@@ -17,7 +21,7 @@ module Brute
17
21
  response = @app.call(env)
18
22
 
19
23
  begin
20
- @session.save(env[:context])
24
+ @session.save_messages(env[:messages])
21
25
  rescue => e
22
26
  warn "[brute] Session save failed: #{e.message}"
23
27
  end
@@ -27,3 +31,42 @@ module Brute
27
31
  end
28
32
  end
29
33
  end
34
+
35
+ test do
36
+ require_relative "../../../spec/support/mock_provider"
37
+ require_relative "../../../spec/support/mock_response"
38
+
39
+ def build_env(**overrides)
40
+ { provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
41
+ messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
42
+ tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
43
+ end
44
+
45
+ it "passes the response through unchanged" do
46
+ response = MockResponse.new(content: "saved response")
47
+ session = Struct.new(:saved) { def save_messages(m); self.saved = m; end }.new
48
+ inner_app = ->(_env) { response }
49
+ middleware = Brute::Middleware::SessionPersistence.new(inner_app, session: session)
50
+ result = middleware.call(build_env)
51
+ result.should == response
52
+ end
53
+
54
+ it "calls session.save_messages with env messages" do
55
+ response = MockResponse.new(content: "saved response")
56
+ session = Struct.new(:saved) { def save_messages(m); self.saved = m; end }.new
57
+ inner_app = ->(_env) { response }
58
+ middleware = Brute::Middleware::SessionPersistence.new(inner_app, session: session)
59
+ messages = [LLM::Message.new(:user, "hello")]
60
+ middleware.call(build_env(messages: messages))
61
+ session.saved.should == messages
62
+ end
63
+
64
+ it "does not propagate session save failures" do
65
+ response = MockResponse.new(content: "saved response")
66
+ session = Object.new
67
+ session.define_singleton_method(:save_messages) { |_| raise RuntimeError, "disk full" }
68
+ inner_app = ->(_env) { response }
69
+ middleware = Brute::Middleware::SessionPersistence.new(inner_app, session: session)
70
+ lambda { middleware.call(build_env) }.should.not.raise
71
+ end
72
+ end
@@ -1,5 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "bundler/setup"
4
+ require "brute"
5
+
3
6
  module Brute
4
7
  module Middleware
5
8
  # Tracks cumulative token usage across all LLM calls in a session.
@@ -44,3 +47,78 @@ module Brute
44
47
  end
45
48
  end
46
49
  end
50
+
51
+ test do
52
+ require_relative "../../../spec/support/mock_provider"
53
+ require_relative "../../../spec/support/mock_response"
54
+
55
+ def build_env(**overrides)
56
+ { provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
57
+ messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
58
+ tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
59
+ end
60
+
61
+ def make_response
62
+ MockResponse.new(content: "hello",
63
+ usage: LLM::Usage.new(input_tokens: 100, output_tokens: 50, reasoning_tokens: 10, total_tokens: 160))
64
+ end
65
+
66
+ it "passes the response through unchanged" do
67
+ response = make_response
68
+ middleware = Brute::Middleware::TokenTracking.new(->(_env) { response })
69
+ result = middleware.call(build_env)
70
+ result.should == response
71
+ end
72
+
73
+ it "populates total_input tokens" do
74
+ middleware = Brute::Middleware::TokenTracking.new(->(_env) { make_response })
75
+ env = build_env
76
+ middleware.call(env)
77
+ env[:metadata][:tokens][:total_input].should == 100
78
+ end
79
+
80
+ it "populates total_output tokens" do
81
+ middleware = Brute::Middleware::TokenTracking.new(->(_env) { make_response })
82
+ env = build_env
83
+ middleware.call(env)
84
+ env[:metadata][:tokens][:total_output].should == 50
85
+ end
86
+
87
+ it "populates total_reasoning tokens" do
88
+ middleware = Brute::Middleware::TokenTracking.new(->(_env) { make_response })
89
+ env = build_env
90
+ middleware.call(env)
91
+ env[:metadata][:tokens][:total_reasoning].should == 10
92
+ end
93
+
94
+ it "populates call_count" do
95
+ middleware = Brute::Middleware::TokenTracking.new(->(_env) { make_response })
96
+ env = build_env
97
+ middleware.call(env)
98
+ env[:metadata][:tokens][:call_count].should == 1
99
+ end
100
+
101
+ it "accumulates token counts across multiple calls" do
102
+ middleware = Brute::Middleware::TokenTracking.new(->(_env) { make_response })
103
+ env = build_env
104
+ middleware.call(env)
105
+ middleware.call(env)
106
+ env[:metadata][:tokens][:total_input].should == 200
107
+ end
108
+
109
+ it "handles a response without usage gracefully" do
110
+ no_usage = Object.new
111
+ middleware = Brute::Middleware::TokenTracking.new(->(_env) { no_usage })
112
+ env = build_env
113
+ middleware.call(env)
114
+ env[:metadata][:tokens].should.be.nil
115
+ end
116
+
117
+ it "handles a response where usage returns nil" do
118
+ nil_usage = Struct.new(:usage).new(nil)
119
+ middleware = Brute::Middleware::TokenTracking.new(->(_env) { nil_usage })
120
+ env = build_env
121
+ middleware.call(env)
122
+ env[:metadata][:tokens].should.be.nil
123
+ end
124
+ end
@@ -1,5 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "bundler/setup"
4
+ require "brute"
5
+
3
6
  module Brute
4
7
  module Middleware
5
8
  # Tracks per-tool error counts and total tool call count across LLM
@@ -10,7 +13,7 @@ module Brute
10
13
  # and counts failures and totals.
11
14
  #
12
15
  # When any tool exceeds max_failures, it sets env[:metadata][:tool_error_limit_reached]
13
- # so the orchestrator can decide to stop.
16
+ # so the agent loop can decide to stop.
14
17
  #
15
18
  # Also stores env[:metadata][:tool_calls] with the cumulative number of
16
19
  # tool invocations in the current session.
@@ -39,6 +42,15 @@ module Brute
39
42
  env[:metadata][:tool_errors] = @errors.dup
40
43
  env[:metadata][:tool_error_limit_reached] = @errors.any? { |_, c| c >= @max_failures }
41
44
 
45
+ if env[:metadata][:tool_error_limit_reached]
46
+ failed_tool, fail_count = @errors.max_by { |_, c| c }
47
+ env[:should_exit] ||= {
48
+ reason: "tool_error_limit_reached",
49
+ message: "Tool '#{failed_tool}' has failed #{fail_count} times (limit: #{@max_failures}). Stopping.",
50
+ source: "ToolErrorTracking",
51
+ }
52
+ end
53
+
42
54
  @app.call(env)
43
55
  end
44
56
 
@@ -50,3 +62,118 @@ module Brute
50
62
  end
51
63
  end
52
64
  end
65
+
66
+ test do
67
+ require_relative "../../../spec/support/mock_provider"
68
+ require_relative "../../../spec/support/mock_response"
69
+
70
+ def build_env(**overrides)
71
+ { provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
72
+ messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
73
+ tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
74
+ end
75
+
76
+ def make_middleware(app = nil)
77
+ app ||= ->(_env) { MockResponse.new(content: "tracked") }
78
+ Brute::Middleware::ToolErrorTracking.new(app, max_failures: 3)
79
+ end
80
+
81
+ it "passes the response through" do
82
+ response = MockResponse.new(content: "tracked")
83
+ app = ->(_env) { response }
84
+ result = make_middleware(app).call(build_env)
85
+ result.should == response
86
+ end
87
+
88
+ it "reports zero tool calls when tool_results is nil" do
89
+ env = build_env(tool_results: nil)
90
+ make_middleware.call(env)
91
+ env[:metadata][:tool_calls].should == 0
92
+ end
93
+
94
+ it "reports empty tool errors when tool_results is nil" do
95
+ env = build_env(tool_results: nil)
96
+ make_middleware.call(env)
97
+ env[:metadata][:tool_errors].should == {}
98
+ end
99
+
100
+ it "does not flag limit reached when tool_results is nil" do
101
+ env = build_env(tool_results: nil)
102
+ make_middleware.call(env)
103
+ env[:metadata][:tool_error_limit_reached].should.be.false
104
+ end
105
+
106
+ it "counts total tool calls from tool_results" do
107
+ results = [["fs_read", { content: "data" }], ["shell", { output: "ok" }], ["fs_write", { success: true }]]
108
+ env = build_env(tool_results: results)
109
+ make_middleware.call(env)
110
+ env[:metadata][:tool_calls].should == 3
111
+ end
112
+
113
+ it "counts per-tool errors from results with error key" do
114
+ results = [["fs_read", { error: "not found" }], ["fs_read", { error: "denied" }], ["shell", { output: "ok" }]]
115
+ env = build_env(tool_results: results)
116
+ make_middleware.call(env)
117
+ env[:metadata][:tool_errors].should == { "fs_read" => 2 }
118
+ end
119
+
120
+ it "sets tool_error_limit_reached when a tool hits max_failures" do
121
+ results = [["fs_read", { error: "1" }], ["fs_read", { error: "2" }], ["fs_read", { error: "3" }]]
122
+ env = build_env(tool_results: results)
123
+ make_middleware.call(env)
124
+ env[:metadata][:tool_error_limit_reached].should.be.true
125
+ end
126
+
127
+ it "does not flag below the threshold" do
128
+ results = [["fs_read", { error: "1" }], ["fs_read", { error: "2" }]]
129
+ env = build_env(tool_results: results)
130
+ make_middleware.call(env)
131
+ env[:metadata][:tool_error_limit_reached].should.be.false
132
+ end
133
+
134
+ it "accumulates counts across multiple calls" do
135
+ mw = make_middleware
136
+ mw.call(build_env(tool_results: [["fs_read", { error: "fail" }]]))
137
+ env2 = build_env(tool_results: [["fs_read", { error: "again" }], ["shell", { output: "ok" }]])
138
+ mw.call(env2)
139
+ env2[:metadata][:tool_calls].should == 3
140
+ end
141
+
142
+ it "clears counters on reset!" do
143
+ mw = make_middleware
144
+ mw.call(build_env(tool_results: [["fs_read", { error: "fail" }]]))
145
+ mw.reset!
146
+ env2 = build_env(tool_results: nil)
147
+ mw.call(env2)
148
+ env2[:metadata][:tool_calls].should == 0
149
+ end
150
+
151
+ it "sets should_exit reason when error limit reached" do
152
+ results = [["fs_read", { error: "1" }], ["fs_read", { error: "2" }], ["fs_read", { error: "3" }]]
153
+ env = build_env(tool_results: results)
154
+ make_middleware.call(env)
155
+ env[:should_exit][:reason].should == "tool_error_limit_reached"
156
+ end
157
+
158
+ it "sets should_exit source to ToolErrorTracking" do
159
+ results = [["fs_read", { error: "1" }], ["fs_read", { error: "2" }], ["fs_read", { error: "3" }]]
160
+ env = build_env(tool_results: results)
161
+ make_middleware.call(env)
162
+ env[:should_exit][:source].should == "ToolErrorTracking"
163
+ end
164
+
165
+ it "does not set should_exit below the threshold" do
166
+ results = [["fs_read", { error: "1" }], ["fs_read", { error: "2" }]]
167
+ env = build_env(tool_results: results)
168
+ make_middleware.call(env)
169
+ env[:should_exit].should.be.nil
170
+ end
171
+
172
+ it "does not overwrite should_exit if already set" do
173
+ results = [["fs_read", { error: "1" }], ["fs_read", { error: "2" }], ["fs_read", { error: "3" }]]
174
+ existing = { reason: "doom_loop_detected", message: "loop", source: "DoomLoopDetection" }
175
+ env = build_env(tool_results: results, should_exit: existing)
176
+ make_middleware.call(env)
177
+ env[:should_exit][:reason].should == "doom_loop_detected"
178
+ end
179
+ end
@@ -1,24 +1,27 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "bundler/setup"
4
+ require "brute"
5
+
3
6
  module Brute
4
7
  module Middleware
5
8
  # Guards against tool-only LLM responses where the assistant message
6
9
  # is dropped from the context buffer.
7
10
  #
8
11
  # When the LLM responds with only tool_use blocks (no text), llm.rb's
9
- # response adapter produces empty choices. Context#talk appends nil,
10
- # BufferNilGuard strips it, and the assistant message carrying tool_use
11
- # blocks is lost. This causes "unexpected tool_use_id" on the next call
12
- # because tool_result references a tool_use that's missing from the buffer.
12
+ # response adapter produces empty choices. The assistant message carrying
13
+ # tool_use blocks may be lost. This causes "unexpected tool_use_id" on
14
+ # the next call because tool_result references a tool_use that's missing
15
+ # from the message history.
13
16
  #
14
17
  # This middleware runs post-call and ensures every pending tool_use ID
15
- # is covered by an assistant message in the buffer. It handles three
18
+ # is covered by an assistant message in env[:messages]. It handles three
16
19
  # cases:
17
20
  #
18
- # 1. ctx.functions is non-empty and the assistant message exists → no-op
19
- # 2. ctx.functions is non-empty but the assistant message is missing
21
+ # 1. pending_functions is non-empty and the assistant message exists → no-op
22
+ # 2. pending_functions is non-empty but the assistant message is missing
20
23
  # (or has different IDs) → inject synthetic message
21
- # 3. ctx.functions is empty (nil-choice bug) but the stream recorded
24
+ # 3. pending_functions is empty (nil-choice bug) but the stream recorded
22
25
  # tool calls → inject synthetic message using stream metadata
23
26
  #
24
27
  class ToolUseGuard
@@ -29,32 +32,30 @@ module Brute
29
32
  def call(env)
30
33
  response = @app.call(env)
31
34
 
32
- ctx = env[:context]
33
-
34
- # Collect pending tool data from ctx.functions (primary) or the
35
- # stream's recorded metadata (fallback for nil-choice bug).
36
- tool_data = collect_tool_data(ctx, env)
35
+ # Collect pending tool data from env[:pending_functions] (primary)
36
+ # or the stream's recorded metadata (fallback for nil-choice bug).
37
+ tool_data = collect_tool_data(env)
37
38
  return response if tool_data.empty?
38
39
 
39
40
  # Find all tool_use IDs already covered by assistant messages.
40
- covered_ids = covered_tool_ids(ctx)
41
+ covered_ids = covered_tool_ids(env[:messages])
41
42
 
42
43
  # Inject a synthetic assistant message for any uncovered tool calls.
43
44
  uncovered = tool_data.reject { |td| covered_ids.include?(td[:id]) }
44
- inject_synthetic!(ctx, uncovered) unless uncovered.empty?
45
+ inject_synthetic!(env[:messages], uncovered) unless uncovered.empty?
45
46
 
46
47
  response
47
48
  end
48
49
 
49
50
  private
50
51
 
51
- def collect_tool_data(ctx, env)
52
- functions = ctx.functions
52
+ def collect_tool_data(env)
53
+ functions = env[:pending_functions]
53
54
  if functions && !functions.empty?
54
55
  functions.map { |fn| { id: fn.id, name: fn.name, arguments: fn.arguments } }
55
56
  elsif env[:streaming]
56
- stream = resolve_stream(ctx)
57
- if stream
57
+ stream = env[:stream]
58
+ if stream&.respond_to?(:pending_tool_calls)
58
59
  data = stream.pending_tool_calls.dup
59
60
  stream.clear_pending_tool_calls!
60
61
  data
@@ -66,19 +67,14 @@ module Brute
66
67
  end
67
68
  end
68
69
 
69
- def resolve_stream(ctx)
70
- stream = ctx.instance_variable_get(:@params)&.dig(:stream)
71
- stream if stream.respond_to?(:pending_tool_calls)
72
- end
73
-
74
- def covered_tool_ids(ctx)
75
- ctx.messages.to_a
70
+ def covered_tool_ids(messages)
71
+ messages
76
72
  .select { |m| m.role.to_s == "assistant" && m.tool_call? }
77
73
  .flat_map { |m| (m.extra.original_tool_calls || []).map { |tc| tc["id"] } }
78
74
  .to_set
79
75
  end
80
76
 
81
- def inject_synthetic!(ctx, uncovered)
77
+ def inject_synthetic!(messages, uncovered)
82
78
  tool_calls = uncovered.map do |td|
83
79
  LLM::Object.from(id: td[:id], name: td[:name], arguments: td[:arguments])
84
80
  end
@@ -90,8 +86,48 @@ module Brute
90
86
  tool_calls: tool_calls,
91
87
  original_tool_calls: original_tool_calls,
92
88
  })
93
- ctx.messages.concat([synthetic])
89
+ messages << synthetic
94
90
  end
95
91
  end
96
92
  end
97
93
  end
94
+
95
+ test do
96
+ require_relative "../../../spec/support/mock_provider"
97
+ require_relative "../../../spec/support/mock_response"
98
+
99
+ def build_env(**overrides)
100
+ { provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
101
+ messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
102
+ tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
103
+ end
104
+
105
+ it "passes the response through when there are no pending functions" do
106
+ response = MockResponse.new(content: "no tools")
107
+ inner_app = ->(_env) { response }
108
+ middleware = Brute::Middleware::ToolUseGuard.new(inner_app)
109
+ result = middleware.call(build_env(pending_functions: []))
110
+ result.should == response
111
+ end
112
+
113
+ it "injects a synthetic assistant message when tool calls exist but assistant is missing" do
114
+ fn = Struct.new(:id, :name, :arguments, keyword_init: true)
115
+ .new(id: "toolu_1", name: "fs_read", arguments: { "path" => "test.rb" })
116
+ response = MockResponse.new(content: "")
117
+ inner_app = ->(_env) { response }
118
+ middleware = Brute::Middleware::ToolUseGuard.new(inner_app)
119
+ env = build_env(messages: [], pending_functions: [fn])
120
+ lambda { middleware.call(env) }.should.not.raise
121
+ end
122
+
123
+ it "creates one assistant message for uncovered tool calls" do
124
+ fn = Struct.new(:id, :name, :arguments, keyword_init: true)
125
+ .new(id: "toolu_1", name: "fs_read", arguments: { "path" => "test.rb" })
126
+ response = MockResponse.new(content: "")
127
+ inner_app = ->(_env) { response }
128
+ middleware = Brute::Middleware::ToolUseGuard.new(inner_app)
129
+ env = build_env(messages: [], pending_functions: [fn])
130
+ middleware.call(env)
131
+ env[:messages].select { |m| m.role.to_s == "assistant" }.size.should == 1
132
+ end
133
+ end