brute 0.4.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/brute/agent.rb +14 -0
- data/lib/brute/diff.rb +24 -0
- data/lib/brute/loop/agent_stream.rb +118 -0
- data/lib/brute/loop/agent_turn.rb +520 -0
- data/lib/brute/{compactor.rb → loop/compactor.rb} +2 -0
- data/lib/brute/{doom_loop.rb → loop/doom_loop.rb} +2 -0
- data/lib/brute/loop/step.rb +332 -0
- data/lib/brute/loop/tool_call_step.rb +90 -0
- data/lib/brute/middleware/compaction_check.rb +70 -23
- data/lib/brute/middleware/doom_loop_detection.rb +110 -7
- data/lib/brute/middleware/llm_call.rb +88 -1
- data/lib/brute/middleware/message_tracking.rb +140 -10
- data/lib/brute/middleware/otel/span.rb +32 -2
- data/lib/brute/middleware/otel/token_usage.rb +38 -0
- data/lib/brute/middleware/otel/tool_calls.rb +30 -1
- data/lib/brute/middleware/otel/tool_results.rb +29 -1
- data/lib/brute/middleware/otel.rb +5 -0
- data/lib/brute/middleware/reasoning_normalizer.rb +94 -0
- data/lib/brute/middleware/retry.rb +113 -1
- data/lib/brute/middleware/session_persistence.rb +46 -3
- data/lib/brute/middleware/token_tracking.rb +78 -0
- data/lib/brute/middleware/tool_error_tracking.rb +128 -1
- data/lib/brute/middleware/tool_use_guard.rb +64 -28
- data/lib/brute/middleware/tracing.rb +63 -2
- data/lib/brute/middleware.rb +18 -0
- data/lib/brute/orchestrator/turn.rb +105 -0
- data/lib/brute/patches/buffer_nil_guard.rb +5 -0
- data/lib/brute/pipeline.rb +86 -7
- data/lib/brute/prompts/build_switch.rb +29 -0
- data/lib/brute/prompts/environment.rb +43 -0
- data/lib/brute/prompts/identity.rb +29 -0
- data/lib/brute/prompts/instructions.rb +21 -0
- data/lib/brute/prompts/max_steps.rb +25 -0
- data/lib/brute/prompts/plan_reminder.rb +25 -0
- data/lib/brute/prompts/skills.rb +13 -0
- data/lib/brute/prompts.rb +28 -0
- data/lib/brute/providers/ollama.rb +135 -0
- data/lib/brute/providers/opencode_go.rb +5 -0
- data/lib/brute/providers/opencode_zen.rb +7 -2
- data/lib/brute/providers/shell.rb +2 -2
- data/lib/brute/providers/shell_response.rb +7 -2
- data/lib/brute/providers.rb +62 -0
- data/lib/brute/queue/base_queue.rb +222 -0
- data/lib/brute/{file_mutation_queue.rb → queue/file_mutation_queue.rb} +28 -26
- data/lib/brute/queue/parallel_queue.rb +66 -0
- data/lib/brute/queue/sequential_queue.rb +63 -0
- data/lib/brute/{message_store.rb → store/message_store.rb} +155 -62
- data/lib/brute/store/session.rb +106 -0
- data/lib/brute/{snapshot_store.rb → store/snapshot_store.rb} +2 -0
- data/lib/brute/{todo_store.rb → store/todo_store.rb} +2 -0
- data/lib/brute/system_prompt.rb +101 -0
- data/lib/brute/tools/delegate.rb +59 -0
- data/lib/brute/tools/fs_patch.rb +54 -2
- data/lib/brute/tools/fs_read.rb +5 -0
- data/lib/brute/tools/fs_remove.rb +7 -2
- data/lib/brute/tools/fs_search.rb +5 -0
- data/lib/brute/tools/fs_undo.rb +7 -2
- data/lib/brute/tools/fs_write.rb +40 -2
- data/lib/brute/tools/net_fetch.rb +5 -0
- data/lib/brute/tools/question.rb +5 -0
- data/lib/brute/tools/shell.rb +5 -0
- data/lib/brute/tools/todo_read.rb +6 -1
- data/lib/brute/tools/todo_write.rb +6 -1
- data/lib/brute/tools.rb +31 -0
- data/lib/brute/version.rb +1 -1
- data/lib/brute.rb +40 -204
- metadata +31 -20
- data/lib/brute/agent_stream.rb +0 -63
- data/lib/brute/hooks.rb +0 -84
- data/lib/brute/orchestrator.rb +0 -391
- data/lib/brute/session.rb +0 -161
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "bundler/setup"
|
|
4
|
+
require "brute"
|
|
5
|
+
|
|
3
6
|
module Brute
|
|
4
7
|
module Middleware
|
|
5
8
|
# Handles reasoning/thinking content across model switches.
|
|
@@ -96,3 +99,94 @@ module Brute
|
|
|
96
99
|
end
|
|
97
100
|
end
|
|
98
101
|
end
|
|
102
|
+
|
|
103
|
+
test do
|
|
104
|
+
require_relative "../../../spec/support/mock_provider"
|
|
105
|
+
require_relative "../../../spec/support/mock_response"
|
|
106
|
+
|
|
107
|
+
def build_env(**overrides)
|
|
108
|
+
{ provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
|
|
109
|
+
messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
|
|
110
|
+
tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def make_provider(type_name)
|
|
114
|
+
klass = Class.new do
|
|
115
|
+
define_method(:name) { :mock }
|
|
116
|
+
define_method(:default_model) { "mock-model" }
|
|
117
|
+
define_method(:user_role) { :user }
|
|
118
|
+
define_method(:system_role) { :system }
|
|
119
|
+
define_method(:assistant_role) { :assistant }
|
|
120
|
+
define_method(:tool_role) { :tool }
|
|
121
|
+
define_method(:tracer) { nil }
|
|
122
|
+
define_method(:tracer=) { |*| }
|
|
123
|
+
define_method(:complete) { |*_args, **_kw| MockResponse.new(content: "ok") }
|
|
124
|
+
end
|
|
125
|
+
klass.define_method(:class) do
|
|
126
|
+
c = super()
|
|
127
|
+
name_str = "LLM::#{type_name}"
|
|
128
|
+
c.define_singleton_method(:name) { name_str }
|
|
129
|
+
c
|
|
130
|
+
end
|
|
131
|
+
klass.new
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
inner_app = ->(_env) { MockResponse.new(content: "reasoned response") }
|
|
135
|
+
|
|
136
|
+
it "injects thinking param for Anthropic with budget_tokens" do
|
|
137
|
+
provider = make_provider("Anthropic")
|
|
138
|
+
middleware = Brute::Middleware::ReasoningNormalizer.new(inner_app, model_id: "claude-4", budget_tokens: 8000, enabled: true)
|
|
139
|
+
env = build_env(provider: provider, params: {})
|
|
140
|
+
middleware.call(env)
|
|
141
|
+
env[:params][:thinking].should == { type: "enabled", budget_tokens: 8000 }
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
it "does not inject thinking param for Anthropic without budget_tokens" do
|
|
145
|
+
provider = make_provider("Anthropic")
|
|
146
|
+
middleware = Brute::Middleware::ReasoningNormalizer.new(inner_app, model_id: "claude-4", enabled: true)
|
|
147
|
+
env = build_env(provider: provider, params: {})
|
|
148
|
+
middleware.call(env)
|
|
149
|
+
env[:params][:thinking].should.be.nil
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
it "injects reasoning_effort for OpenAI" do
|
|
153
|
+
provider = make_provider("OpenAI")
|
|
154
|
+
middleware = Brute::Middleware::ReasoningNormalizer.new(inner_app, model_id: "o3", effort: :high, enabled: true)
|
|
155
|
+
env = build_env(provider: provider, params: {})
|
|
156
|
+
middleware.call(env)
|
|
157
|
+
env[:params][:reasoning_effort].should == "high"
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
it "maps low effort correctly for OpenAI" do
|
|
161
|
+
provider = make_provider("OpenAI")
|
|
162
|
+
middleware = Brute::Middleware::ReasoningNormalizer.new(inner_app, model_id: "o3", effort: :low, enabled: true)
|
|
163
|
+
env = build_env(provider: provider, params: {})
|
|
164
|
+
middleware.call(env)
|
|
165
|
+
env[:params][:reasoning_effort].should == "low"
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
it "does not inject params for unknown provider" do
|
|
169
|
+
provider = make_provider("Mistral")
|
|
170
|
+
middleware = Brute::Middleware::ReasoningNormalizer.new(inner_app, model_id: "mistral-large", enabled: true)
|
|
171
|
+
env = build_env(provider: provider, params: {})
|
|
172
|
+
middleware.call(env)
|
|
173
|
+
env[:params].should == {}
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
it "does not inject params when disabled" do
|
|
177
|
+
provider = make_provider("Anthropic")
|
|
178
|
+
middleware = Brute::Middleware::ReasoningNormalizer.new(inner_app, model_id: "claude-4", budget_tokens: 8000, enabled: false)
|
|
179
|
+
env = build_env(provider: provider, params: {})
|
|
180
|
+
middleware.call(env)
|
|
181
|
+
env[:params].should == {}
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
it "allows model_id to be updated mid-session" do
|
|
185
|
+
middleware = Brute::Middleware::ReasoningNormalizer.new(inner_app, model_id: "old", enabled: true)
|
|
186
|
+
middleware.model_id = "new"
|
|
187
|
+
provider = make_provider("OpenAI")
|
|
188
|
+
env = build_env(provider: provider, params: {})
|
|
189
|
+
middleware.call(env)
|
|
190
|
+
env[:params][:reasoning_effort].should.not.be.nil
|
|
191
|
+
end
|
|
192
|
+
end
|
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "bundler/setup"
|
|
4
|
+
require "brute"
|
|
5
|
+
|
|
3
6
|
module Brute
|
|
4
7
|
module Middleware
|
|
5
8
|
# Retries the inner call on transient LLM errors with exponential backoff.
|
|
@@ -9,7 +12,7 @@ module Brute
|
|
|
9
12
|
# propagate immediately.
|
|
10
13
|
#
|
|
11
14
|
# Unlike forgecode's separate retry.rs, this middleware wraps the LLM call
|
|
12
|
-
# directly — it sees the error and retries without the
|
|
15
|
+
# directly — it sees the error and retries without the agent loop knowing.
|
|
13
16
|
#
|
|
14
17
|
class Retry < Base
|
|
15
18
|
DEFAULT_MAX_ATTEMPTS = 3
|
|
@@ -43,3 +46,112 @@ module Brute
|
|
|
43
46
|
end
|
|
44
47
|
end
|
|
45
48
|
end
|
|
49
|
+
|
|
50
|
+
test do
|
|
51
|
+
require_relative "../../../spec/support/mock_provider"
|
|
52
|
+
require_relative "../../../spec/support/mock_response"
|
|
53
|
+
|
|
54
|
+
def build_env(**overrides)
|
|
55
|
+
{ provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
|
|
56
|
+
messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
|
|
57
|
+
tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def mock_inner_app(response:)
|
|
61
|
+
calls = []
|
|
62
|
+
app = ->(env) { calls << env; response }
|
|
63
|
+
[app, calls]
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def flaky_inner_app(error_class, fail_count:, response:)
|
|
67
|
+
attempt = 0
|
|
68
|
+
->(env) { attempt += 1; raise error_class, "transient" if attempt <= fail_count; response }
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def no_sleep_retry(*args, **kwargs)
|
|
72
|
+
mw = Brute::Middleware::Retry.new(*args, **kwargs)
|
|
73
|
+
mw.define_singleton_method(:sleep) { |_| }
|
|
74
|
+
mw
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
it "returns the response on first successful call" do
|
|
78
|
+
response = MockResponse.new(content: "success")
|
|
79
|
+
app, calls = mock_inner_app(response: response)
|
|
80
|
+
middleware = Brute::Middleware::Retry.new(app)
|
|
81
|
+
result = middleware.call(build_env)
|
|
82
|
+
result.should == response
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
it "calls inner app exactly once on success" do
|
|
86
|
+
response = MockResponse.new(content: "success")
|
|
87
|
+
app, calls = mock_inner_app(response: response)
|
|
88
|
+
Brute::Middleware::Retry.new(app).call(build_env)
|
|
89
|
+
calls.size.should == 1
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
it "retries on LLM::RateLimitError and succeeds" do
|
|
93
|
+
response = MockResponse.new(content: "success")
|
|
94
|
+
app = flaky_inner_app(LLM::RateLimitError, fail_count: 2, response: response)
|
|
95
|
+
middleware = no_sleep_retry(app, max_attempts: 3, base_delay: 2)
|
|
96
|
+
env = build_env
|
|
97
|
+
result = middleware.call(env)
|
|
98
|
+
result.should == response
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
it "records retry_attempt in metadata after retries" do
|
|
102
|
+
response = MockResponse.new(content: "success")
|
|
103
|
+
app = flaky_inner_app(LLM::RateLimitError, fail_count: 2, response: response)
|
|
104
|
+
middleware = no_sleep_retry(app, max_attempts: 3, base_delay: 2)
|
|
105
|
+
env = build_env
|
|
106
|
+
middleware.call(env)
|
|
107
|
+
env[:metadata][:retry_attempt].should == 2
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
it "retries on LLM::ServerError and succeeds" do
|
|
111
|
+
response = MockResponse.new(content: "success")
|
|
112
|
+
app = flaky_inner_app(LLM::ServerError, fail_count: 1, response: response)
|
|
113
|
+
middleware = no_sleep_retry(app, max_attempts: 3, base_delay: 2)
|
|
114
|
+
result = middleware.call(build_env)
|
|
115
|
+
result.should == response
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
it "re-raises after exhausting all attempts" do
|
|
119
|
+
app = ->(_env) { raise LLM::RateLimitError, "rate limited" }
|
|
120
|
+
middleware = no_sleep_retry(app, max_attempts: 3, base_delay: 2)
|
|
121
|
+
lambda { middleware.call(build_env) }.should.raise(LLM::RateLimitError)
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
it "does not retry non-retryable errors" do
|
|
125
|
+
call_count = 0
|
|
126
|
+
app = ->(_env) { call_count += 1; raise ArgumentError, "bad input" }
|
|
127
|
+
middleware = Brute::Middleware::Retry.new(app)
|
|
128
|
+
lambda { middleware.call(build_env) }.should.raise(ArgumentError)
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
it "only calls inner app once for non-retryable errors" do
|
|
132
|
+
call_count = 0
|
|
133
|
+
app = ->(_env) { call_count += 1; raise ArgumentError, "bad input" }
|
|
134
|
+
middleware = Brute::Middleware::Retry.new(app)
|
|
135
|
+
begin; middleware.call(build_env); rescue ArgumentError; end
|
|
136
|
+
call_count.should == 1
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
it "records retry_delay in metadata" do
|
|
140
|
+
response = MockResponse.new(content: "success")
|
|
141
|
+
app = flaky_inner_app(LLM::RateLimitError, fail_count: 1, response: response)
|
|
142
|
+
middleware = no_sleep_retry(app, max_attempts: 3, base_delay: 3)
|
|
143
|
+
env = build_env
|
|
144
|
+
middleware.call(env)
|
|
145
|
+
env[:metadata][:retry_delay].should == 3
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
it "tracks sleep delays for exponential backoff" do
|
|
149
|
+
response = MockResponse.new(content: "success")
|
|
150
|
+
app = flaky_inner_app(LLM::RateLimitError, fail_count: 2, response: response)
|
|
151
|
+
delays = []
|
|
152
|
+
mw = Brute::Middleware::Retry.new(app, max_attempts: 3, base_delay: 2)
|
|
153
|
+
mw.define_singleton_method(:sleep) { |d| delays << d }
|
|
154
|
+
mw.call(build_env)
|
|
155
|
+
delays.should == [2, 4]
|
|
156
|
+
end
|
|
157
|
+
end
|
|
@@ -1,11 +1,15 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "bundler/setup"
|
|
4
|
+
require "brute"
|
|
5
|
+
|
|
3
6
|
module Brute
|
|
4
7
|
module Middleware
|
|
5
8
|
# Saves the conversation to disk after each LLM call.
|
|
6
9
|
#
|
|
7
|
-
# Runs POST-call:
|
|
8
|
-
# a broken session save should never crash
|
|
10
|
+
# Runs POST-call: serializes env[:messages] via Session#save_messages.
|
|
11
|
+
# Failures are non-fatal — a broken session save should never crash
|
|
12
|
+
# the agent loop.
|
|
9
13
|
#
|
|
10
14
|
class SessionPersistence < Base
|
|
11
15
|
def initialize(app, session:)
|
|
@@ -17,7 +21,7 @@ module Brute
|
|
|
17
21
|
response = @app.call(env)
|
|
18
22
|
|
|
19
23
|
begin
|
|
20
|
-
@session.
|
|
24
|
+
@session.save_messages(env[:messages])
|
|
21
25
|
rescue => e
|
|
22
26
|
warn "[brute] Session save failed: #{e.message}"
|
|
23
27
|
end
|
|
@@ -27,3 +31,42 @@ module Brute
|
|
|
27
31
|
end
|
|
28
32
|
end
|
|
29
33
|
end
|
|
34
|
+
|
|
35
|
+
test do
|
|
36
|
+
require_relative "../../../spec/support/mock_provider"
|
|
37
|
+
require_relative "../../../spec/support/mock_response"
|
|
38
|
+
|
|
39
|
+
def build_env(**overrides)
|
|
40
|
+
{ provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
|
|
41
|
+
messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
|
|
42
|
+
tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
it "passes the response through unchanged" do
|
|
46
|
+
response = MockResponse.new(content: "saved response")
|
|
47
|
+
session = Struct.new(:saved) { def save_messages(m); self.saved = m; end }.new
|
|
48
|
+
inner_app = ->(_env) { response }
|
|
49
|
+
middleware = Brute::Middleware::SessionPersistence.new(inner_app, session: session)
|
|
50
|
+
result = middleware.call(build_env)
|
|
51
|
+
result.should == response
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
it "calls session.save_messages with env messages" do
|
|
55
|
+
response = MockResponse.new(content: "saved response")
|
|
56
|
+
session = Struct.new(:saved) { def save_messages(m); self.saved = m; end }.new
|
|
57
|
+
inner_app = ->(_env) { response }
|
|
58
|
+
middleware = Brute::Middleware::SessionPersistence.new(inner_app, session: session)
|
|
59
|
+
messages = [LLM::Message.new(:user, "hello")]
|
|
60
|
+
middleware.call(build_env(messages: messages))
|
|
61
|
+
session.saved.should == messages
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
it "does not propagate session save failures" do
|
|
65
|
+
response = MockResponse.new(content: "saved response")
|
|
66
|
+
session = Object.new
|
|
67
|
+
session.define_singleton_method(:save_messages) { |_| raise RuntimeError, "disk full" }
|
|
68
|
+
inner_app = ->(_env) { response }
|
|
69
|
+
middleware = Brute::Middleware::SessionPersistence.new(inner_app, session: session)
|
|
70
|
+
lambda { middleware.call(build_env) }.should.not.raise
|
|
71
|
+
end
|
|
72
|
+
end
|
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "bundler/setup"
|
|
4
|
+
require "brute"
|
|
5
|
+
|
|
3
6
|
module Brute
|
|
4
7
|
module Middleware
|
|
5
8
|
# Tracks cumulative token usage across all LLM calls in a session.
|
|
@@ -44,3 +47,78 @@ module Brute
|
|
|
44
47
|
end
|
|
45
48
|
end
|
|
46
49
|
end
|
|
50
|
+
|
|
51
|
+
test do
|
|
52
|
+
require_relative "../../../spec/support/mock_provider"
|
|
53
|
+
require_relative "../../../spec/support/mock_response"
|
|
54
|
+
|
|
55
|
+
def build_env(**overrides)
|
|
56
|
+
{ provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
|
|
57
|
+
messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
|
|
58
|
+
tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def make_response
|
|
62
|
+
MockResponse.new(content: "hello",
|
|
63
|
+
usage: LLM::Usage.new(input_tokens: 100, output_tokens: 50, reasoning_tokens: 10, total_tokens: 160))
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
it "passes the response through unchanged" do
|
|
67
|
+
response = make_response
|
|
68
|
+
middleware = Brute::Middleware::TokenTracking.new(->(_env) { response })
|
|
69
|
+
result = middleware.call(build_env)
|
|
70
|
+
result.should == response
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
it "populates total_input tokens" do
|
|
74
|
+
middleware = Brute::Middleware::TokenTracking.new(->(_env) { make_response })
|
|
75
|
+
env = build_env
|
|
76
|
+
middleware.call(env)
|
|
77
|
+
env[:metadata][:tokens][:total_input].should == 100
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
it "populates total_output tokens" do
|
|
81
|
+
middleware = Brute::Middleware::TokenTracking.new(->(_env) { make_response })
|
|
82
|
+
env = build_env
|
|
83
|
+
middleware.call(env)
|
|
84
|
+
env[:metadata][:tokens][:total_output].should == 50
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
it "populates total_reasoning tokens" do
|
|
88
|
+
middleware = Brute::Middleware::TokenTracking.new(->(_env) { make_response })
|
|
89
|
+
env = build_env
|
|
90
|
+
middleware.call(env)
|
|
91
|
+
env[:metadata][:tokens][:total_reasoning].should == 10
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
it "populates call_count" do
|
|
95
|
+
middleware = Brute::Middleware::TokenTracking.new(->(_env) { make_response })
|
|
96
|
+
env = build_env
|
|
97
|
+
middleware.call(env)
|
|
98
|
+
env[:metadata][:tokens][:call_count].should == 1
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
it "accumulates token counts across multiple calls" do
|
|
102
|
+
middleware = Brute::Middleware::TokenTracking.new(->(_env) { make_response })
|
|
103
|
+
env = build_env
|
|
104
|
+
middleware.call(env)
|
|
105
|
+
middleware.call(env)
|
|
106
|
+
env[:metadata][:tokens][:total_input].should == 200
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
it "handles a response without usage gracefully" do
|
|
110
|
+
no_usage = Object.new
|
|
111
|
+
middleware = Brute::Middleware::TokenTracking.new(->(_env) { no_usage })
|
|
112
|
+
env = build_env
|
|
113
|
+
middleware.call(env)
|
|
114
|
+
env[:metadata][:tokens].should.be.nil
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
it "handles a response where usage returns nil" do
|
|
118
|
+
nil_usage = Struct.new(:usage).new(nil)
|
|
119
|
+
middleware = Brute::Middleware::TokenTracking.new(->(_env) { nil_usage })
|
|
120
|
+
env = build_env
|
|
121
|
+
middleware.call(env)
|
|
122
|
+
env[:metadata][:tokens].should.be.nil
|
|
123
|
+
end
|
|
124
|
+
end
|
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "bundler/setup"
|
|
4
|
+
require "brute"
|
|
5
|
+
|
|
3
6
|
module Brute
|
|
4
7
|
module Middleware
|
|
5
8
|
# Tracks per-tool error counts and total tool call count across LLM
|
|
@@ -10,7 +13,7 @@ module Brute
|
|
|
10
13
|
# and counts failures and totals.
|
|
11
14
|
#
|
|
12
15
|
# When any tool exceeds max_failures, it sets env[:metadata][:tool_error_limit_reached]
|
|
13
|
-
# so the
|
|
16
|
+
# so the agent loop can decide to stop.
|
|
14
17
|
#
|
|
15
18
|
# Also stores env[:metadata][:tool_calls] with the cumulative number of
|
|
16
19
|
# tool invocations in the current session.
|
|
@@ -39,6 +42,15 @@ module Brute
|
|
|
39
42
|
env[:metadata][:tool_errors] = @errors.dup
|
|
40
43
|
env[:metadata][:tool_error_limit_reached] = @errors.any? { |_, c| c >= @max_failures }
|
|
41
44
|
|
|
45
|
+
if env[:metadata][:tool_error_limit_reached]
|
|
46
|
+
failed_tool, fail_count = @errors.max_by { |_, c| c }
|
|
47
|
+
env[:should_exit] ||= {
|
|
48
|
+
reason: "tool_error_limit_reached",
|
|
49
|
+
message: "Tool '#{failed_tool}' has failed #{fail_count} times (limit: #{@max_failures}). Stopping.",
|
|
50
|
+
source: "ToolErrorTracking",
|
|
51
|
+
}
|
|
52
|
+
end
|
|
53
|
+
|
|
42
54
|
@app.call(env)
|
|
43
55
|
end
|
|
44
56
|
|
|
@@ -50,3 +62,118 @@ module Brute
|
|
|
50
62
|
end
|
|
51
63
|
end
|
|
52
64
|
end
|
|
65
|
+
|
|
66
|
+
test do
|
|
67
|
+
require_relative "../../../spec/support/mock_provider"
|
|
68
|
+
require_relative "../../../spec/support/mock_response"
|
|
69
|
+
|
|
70
|
+
def build_env(**overrides)
|
|
71
|
+
{ provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
|
|
72
|
+
messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
|
|
73
|
+
tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def make_middleware(app = nil)
|
|
77
|
+
app ||= ->(_env) { MockResponse.new(content: "tracked") }
|
|
78
|
+
Brute::Middleware::ToolErrorTracking.new(app, max_failures: 3)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
it "passes the response through" do
|
|
82
|
+
response = MockResponse.new(content: "tracked")
|
|
83
|
+
app = ->(_env) { response }
|
|
84
|
+
result = make_middleware(app).call(build_env)
|
|
85
|
+
result.should == response
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
it "reports zero tool calls when tool_results is nil" do
|
|
89
|
+
env = build_env(tool_results: nil)
|
|
90
|
+
make_middleware.call(env)
|
|
91
|
+
env[:metadata][:tool_calls].should == 0
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
it "reports empty tool errors when tool_results is nil" do
|
|
95
|
+
env = build_env(tool_results: nil)
|
|
96
|
+
make_middleware.call(env)
|
|
97
|
+
env[:metadata][:tool_errors].should == {}
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
it "does not flag limit reached when tool_results is nil" do
|
|
101
|
+
env = build_env(tool_results: nil)
|
|
102
|
+
make_middleware.call(env)
|
|
103
|
+
env[:metadata][:tool_error_limit_reached].should.be.false
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
it "counts total tool calls from tool_results" do
|
|
107
|
+
results = [["fs_read", { content: "data" }], ["shell", { output: "ok" }], ["fs_write", { success: true }]]
|
|
108
|
+
env = build_env(tool_results: results)
|
|
109
|
+
make_middleware.call(env)
|
|
110
|
+
env[:metadata][:tool_calls].should == 3
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
it "counts per-tool errors from results with error key" do
|
|
114
|
+
results = [["fs_read", { error: "not found" }], ["fs_read", { error: "denied" }], ["shell", { output: "ok" }]]
|
|
115
|
+
env = build_env(tool_results: results)
|
|
116
|
+
make_middleware.call(env)
|
|
117
|
+
env[:metadata][:tool_errors].should == { "fs_read" => 2 }
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
it "sets tool_error_limit_reached when a tool hits max_failures" do
|
|
121
|
+
results = [["fs_read", { error: "1" }], ["fs_read", { error: "2" }], ["fs_read", { error: "3" }]]
|
|
122
|
+
env = build_env(tool_results: results)
|
|
123
|
+
make_middleware.call(env)
|
|
124
|
+
env[:metadata][:tool_error_limit_reached].should.be.true
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
it "does not flag below the threshold" do
|
|
128
|
+
results = [["fs_read", { error: "1" }], ["fs_read", { error: "2" }]]
|
|
129
|
+
env = build_env(tool_results: results)
|
|
130
|
+
make_middleware.call(env)
|
|
131
|
+
env[:metadata][:tool_error_limit_reached].should.be.false
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
it "accumulates counts across multiple calls" do
|
|
135
|
+
mw = make_middleware
|
|
136
|
+
mw.call(build_env(tool_results: [["fs_read", { error: "fail" }]]))
|
|
137
|
+
env2 = build_env(tool_results: [["fs_read", { error: "again" }], ["shell", { output: "ok" }]])
|
|
138
|
+
mw.call(env2)
|
|
139
|
+
env2[:metadata][:tool_calls].should == 3
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
it "clears counters on reset!" do
|
|
143
|
+
mw = make_middleware
|
|
144
|
+
mw.call(build_env(tool_results: [["fs_read", { error: "fail" }]]))
|
|
145
|
+
mw.reset!
|
|
146
|
+
env2 = build_env(tool_results: nil)
|
|
147
|
+
mw.call(env2)
|
|
148
|
+
env2[:metadata][:tool_calls].should == 0
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
it "sets should_exit reason when error limit reached" do
|
|
152
|
+
results = [["fs_read", { error: "1" }], ["fs_read", { error: "2" }], ["fs_read", { error: "3" }]]
|
|
153
|
+
env = build_env(tool_results: results)
|
|
154
|
+
make_middleware.call(env)
|
|
155
|
+
env[:should_exit][:reason].should == "tool_error_limit_reached"
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
it "sets should_exit source to ToolErrorTracking" do
|
|
159
|
+
results = [["fs_read", { error: "1" }], ["fs_read", { error: "2" }], ["fs_read", { error: "3" }]]
|
|
160
|
+
env = build_env(tool_results: results)
|
|
161
|
+
make_middleware.call(env)
|
|
162
|
+
env[:should_exit][:source].should == "ToolErrorTracking"
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
it "does not set should_exit below the threshold" do
|
|
166
|
+
results = [["fs_read", { error: "1" }], ["fs_read", { error: "2" }]]
|
|
167
|
+
env = build_env(tool_results: results)
|
|
168
|
+
make_middleware.call(env)
|
|
169
|
+
env[:should_exit].should.be.nil
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
it "does not overwrite should_exit if already set" do
|
|
173
|
+
results = [["fs_read", { error: "1" }], ["fs_read", { error: "2" }], ["fs_read", { error: "3" }]]
|
|
174
|
+
existing = { reason: "doom_loop_detected", message: "loop", source: "DoomLoopDetection" }
|
|
175
|
+
env = build_env(tool_results: results, should_exit: existing)
|
|
176
|
+
make_middleware.call(env)
|
|
177
|
+
env[:should_exit][:reason].should == "doom_loop_detected"
|
|
178
|
+
end
|
|
179
|
+
end
|
|
@@ -1,24 +1,27 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "bundler/setup"
|
|
4
|
+
require "brute"
|
|
5
|
+
|
|
3
6
|
module Brute
|
|
4
7
|
module Middleware
|
|
5
8
|
# Guards against tool-only LLM responses where the assistant message
|
|
6
9
|
# is dropped from the context buffer.
|
|
7
10
|
#
|
|
8
11
|
# When the LLM responds with only tool_use blocks (no text), llm.rb's
|
|
9
|
-
# response adapter produces empty choices.
|
|
10
|
-
#
|
|
11
|
-
#
|
|
12
|
-
#
|
|
12
|
+
# response adapter produces empty choices. The assistant message carrying
|
|
13
|
+
# tool_use blocks may be lost. This causes "unexpected tool_use_id" on
|
|
14
|
+
# the next call because tool_result references a tool_use that's missing
|
|
15
|
+
# from the message history.
|
|
13
16
|
#
|
|
14
17
|
# This middleware runs post-call and ensures every pending tool_use ID
|
|
15
|
-
# is covered by an assistant message in
|
|
18
|
+
# is covered by an assistant message in env[:messages]. It handles three
|
|
16
19
|
# cases:
|
|
17
20
|
#
|
|
18
|
-
# 1.
|
|
19
|
-
# 2.
|
|
21
|
+
# 1. pending_functions is non-empty and the assistant message exists → no-op
|
|
22
|
+
# 2. pending_functions is non-empty but the assistant message is missing
|
|
20
23
|
# (or has different IDs) → inject synthetic message
|
|
21
|
-
# 3.
|
|
24
|
+
# 3. pending_functions is empty (nil-choice bug) but the stream recorded
|
|
22
25
|
# tool calls → inject synthetic message using stream metadata
|
|
23
26
|
#
|
|
24
27
|
class ToolUseGuard
|
|
@@ -29,32 +32,30 @@ module Brute
|
|
|
29
32
|
def call(env)
|
|
30
33
|
response = @app.call(env)
|
|
31
34
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
# stream's recorded metadata (fallback for nil-choice bug).
|
|
36
|
-
tool_data = collect_tool_data(ctx, env)
|
|
35
|
+
# Collect pending tool data from env[:pending_functions] (primary)
|
|
36
|
+
# or the stream's recorded metadata (fallback for nil-choice bug).
|
|
37
|
+
tool_data = collect_tool_data(env)
|
|
37
38
|
return response if tool_data.empty?
|
|
38
39
|
|
|
39
40
|
# Find all tool_use IDs already covered by assistant messages.
|
|
40
|
-
covered_ids = covered_tool_ids(
|
|
41
|
+
covered_ids = covered_tool_ids(env[:messages])
|
|
41
42
|
|
|
42
43
|
# Inject a synthetic assistant message for any uncovered tool calls.
|
|
43
44
|
uncovered = tool_data.reject { |td| covered_ids.include?(td[:id]) }
|
|
44
|
-
inject_synthetic!(
|
|
45
|
+
inject_synthetic!(env[:messages], uncovered) unless uncovered.empty?
|
|
45
46
|
|
|
46
47
|
response
|
|
47
48
|
end
|
|
48
49
|
|
|
49
50
|
private
|
|
50
51
|
|
|
51
|
-
def collect_tool_data(
|
|
52
|
-
functions =
|
|
52
|
+
def collect_tool_data(env)
|
|
53
|
+
functions = env[:pending_functions]
|
|
53
54
|
if functions && !functions.empty?
|
|
54
55
|
functions.map { |fn| { id: fn.id, name: fn.name, arguments: fn.arguments } }
|
|
55
56
|
elsif env[:streaming]
|
|
56
|
-
stream =
|
|
57
|
-
if stream
|
|
57
|
+
stream = env[:stream]
|
|
58
|
+
if stream&.respond_to?(:pending_tool_calls)
|
|
58
59
|
data = stream.pending_tool_calls.dup
|
|
59
60
|
stream.clear_pending_tool_calls!
|
|
60
61
|
data
|
|
@@ -66,19 +67,14 @@ module Brute
|
|
|
66
67
|
end
|
|
67
68
|
end
|
|
68
69
|
|
|
69
|
-
def
|
|
70
|
-
|
|
71
|
-
stream if stream.respond_to?(:pending_tool_calls)
|
|
72
|
-
end
|
|
73
|
-
|
|
74
|
-
def covered_tool_ids(ctx)
|
|
75
|
-
ctx.messages.to_a
|
|
70
|
+
def covered_tool_ids(messages)
|
|
71
|
+
messages
|
|
76
72
|
.select { |m| m.role.to_s == "assistant" && m.tool_call? }
|
|
77
73
|
.flat_map { |m| (m.extra.original_tool_calls || []).map { |tc| tc["id"] } }
|
|
78
74
|
.to_set
|
|
79
75
|
end
|
|
80
76
|
|
|
81
|
-
def inject_synthetic!(
|
|
77
|
+
def inject_synthetic!(messages, uncovered)
|
|
82
78
|
tool_calls = uncovered.map do |td|
|
|
83
79
|
LLM::Object.from(id: td[:id], name: td[:name], arguments: td[:arguments])
|
|
84
80
|
end
|
|
@@ -90,8 +86,48 @@ module Brute
|
|
|
90
86
|
tool_calls: tool_calls,
|
|
91
87
|
original_tool_calls: original_tool_calls,
|
|
92
88
|
})
|
|
93
|
-
|
|
89
|
+
messages << synthetic
|
|
94
90
|
end
|
|
95
91
|
end
|
|
96
92
|
end
|
|
97
93
|
end
|
|
94
|
+
|
|
95
|
+
test do
|
|
96
|
+
require_relative "../../../spec/support/mock_provider"
|
|
97
|
+
require_relative "../../../spec/support/mock_response"
|
|
98
|
+
|
|
99
|
+
def build_env(**overrides)
|
|
100
|
+
{ provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
|
|
101
|
+
messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
|
|
102
|
+
tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
it "passes the response through when there are no pending functions" do
|
|
106
|
+
response = MockResponse.new(content: "no tools")
|
|
107
|
+
inner_app = ->(_env) { response }
|
|
108
|
+
middleware = Brute::Middleware::ToolUseGuard.new(inner_app)
|
|
109
|
+
result = middleware.call(build_env(pending_functions: []))
|
|
110
|
+
result.should == response
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
it "injects a synthetic assistant message when tool calls exist but assistant is missing" do
|
|
114
|
+
fn = Struct.new(:id, :name, :arguments, keyword_init: true)
|
|
115
|
+
.new(id: "toolu_1", name: "fs_read", arguments: { "path" => "test.rb" })
|
|
116
|
+
response = MockResponse.new(content: "")
|
|
117
|
+
inner_app = ->(_env) { response }
|
|
118
|
+
middleware = Brute::Middleware::ToolUseGuard.new(inner_app)
|
|
119
|
+
env = build_env(messages: [], pending_functions: [fn])
|
|
120
|
+
lambda { middleware.call(env) }.should.not.raise
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
it "creates one assistant message for uncovered tool calls" do
|
|
124
|
+
fn = Struct.new(:id, :name, :arguments, keyword_init: true)
|
|
125
|
+
.new(id: "toolu_1", name: "fs_read", arguments: { "path" => "test.rb" })
|
|
126
|
+
response = MockResponse.new(content: "")
|
|
127
|
+
inner_app = ->(_env) { response }
|
|
128
|
+
middleware = Brute::Middleware::ToolUseGuard.new(inner_app)
|
|
129
|
+
env = build_env(messages: [], pending_functions: [fn])
|
|
130
|
+
middleware.call(env)
|
|
131
|
+
env[:messages].select { |m| m.role.to_s == "assistant" }.size.should == 1
|
|
132
|
+
end
|
|
133
|
+
end
|