brute 0.3.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/brute/agent_stream.rb +126 -2
- data/lib/brute/diff.rb +34 -0
- data/lib/brute/message_store.rb +194 -0
- data/lib/brute/middleware/compaction_check.rb +133 -0
- data/lib/brute/middleware/doom_loop_detection.rb +100 -0
- data/lib/brute/middleware/llm_call.rb +89 -0
- data/lib/brute/middleware/message_tracking.rb +177 -0
- data/lib/brute/middleware/otel/span.rb +111 -0
- data/lib/brute/middleware/otel/token_usage.rb +93 -0
- data/lib/brute/middleware/otel/tool_calls.rb +113 -0
- data/lib/brute/middleware/otel/tool_results.rb +92 -0
- data/lib/brute/middleware/otel.rb +5 -0
- data/lib/brute/middleware/reasoning_normalizer.rb +119 -0
- data/lib/brute/middleware/retry.rb +93 -0
- data/lib/brute/middleware/session_persistence.rb +42 -0
- data/lib/brute/middleware/token_tracking.rb +77 -0
- data/lib/brute/middleware/tool_error_tracking.rb +101 -0
- data/lib/brute/middleware/tool_use_guard.rb +70 -1
- data/lib/brute/middleware/tracing.rb +71 -0
- data/lib/brute/orchestrator.rb +169 -3
- data/lib/brute/patches/buffer_nil_guard.rb +5 -0
- data/lib/brute/pipeline.rb +135 -0
- data/lib/brute/prompts/build_switch.rb +33 -0
- data/lib/brute/prompts/environment.rb +47 -0
- data/lib/brute/prompts/identity.rb +36 -0
- data/lib/brute/prompts/instructions.rb +24 -0
- data/lib/brute/prompts/max_steps.rb +32 -0
- data/lib/brute/prompts/plan_reminder.rb +33 -0
- data/lib/brute/prompts/skills.rb +35 -0
- data/lib/brute/providers/opencode_go.rb +5 -0
- data/lib/brute/providers/opencode_zen.rb +7 -2
- data/lib/brute/providers/shell_response.rb +5 -0
- data/lib/brute/system_prompt.rb +214 -0
- data/lib/brute/tools/delegate.rb +129 -0
- data/lib/brute/tools/fs_patch.rb +53 -0
- data/lib/brute/tools/fs_read.rb +5 -0
- data/lib/brute/tools/fs_remove.rb +5 -0
- data/lib/brute/tools/fs_search.rb +5 -0
- data/lib/brute/tools/fs_undo.rb +5 -0
- data/lib/brute/tools/fs_write.rb +50 -0
- data/lib/brute/tools/net_fetch.rb +5 -0
- data/lib/brute/tools/question.rb +5 -0
- data/lib/brute/tools/shell.rb +5 -0
- data/lib/brute/tools/todo_read.rb +5 -0
- data/lib/brute/tools/todo_write.rb +5 -0
- data/lib/brute/version.rb +1 -1
- data/lib/brute.rb +8 -8
- metadata +2 -2
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
if __FILE__ == $0
|
|
4
|
+
require "bundler/setup"
|
|
5
|
+
require "brute"
|
|
6
|
+
end
|
|
7
|
+
|
|
3
8
|
module Brute
|
|
4
9
|
module Middleware
|
|
5
10
|
module OTel
|
|
@@ -35,3 +40,90 @@ module Brute
|
|
|
35
40
|
end
|
|
36
41
|
end
|
|
37
42
|
end
|
|
43
|
+
|
|
44
|
+
if __FILE__ == $0
|
|
45
|
+
require_relative "../../../../spec/spec_helper"
|
|
46
|
+
|
|
47
|
+
RSpec.describe Brute::Middleware::OTel::ToolResults do
|
|
48
|
+
let(:response) { MockResponse.new(content: "processed") }
|
|
49
|
+
let(:inner_app) { ->(_env) { response } }
|
|
50
|
+
let(:middleware) { described_class.new(inner_app) }
|
|
51
|
+
|
|
52
|
+
it "passes the response through unchanged" do
|
|
53
|
+
env = build_env
|
|
54
|
+
result = middleware.call(env)
|
|
55
|
+
expect(result).to eq(response)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
context "when env[:span] is nil" do
|
|
59
|
+
it "passes through without error" do
|
|
60
|
+
results = [["fs_read", { content: "data" }]]
|
|
61
|
+
env = build_env(tool_results: results)
|
|
62
|
+
|
|
63
|
+
result = middleware.call(env)
|
|
64
|
+
expect(result).to eq(response)
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
context "when env[:span] is present" do
|
|
69
|
+
let(:span) { mock_span }
|
|
70
|
+
|
|
71
|
+
it "does nothing when tool_results is nil" do
|
|
72
|
+
env = build_env(span: span, tool_results: nil)
|
|
73
|
+
middleware.call(env)
|
|
74
|
+
|
|
75
|
+
expect(span).not_to have_received(:add_event)
|
|
76
|
+
expect(span).not_to have_received(:set_attribute)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
it "records a tool_result event per result" do
|
|
80
|
+
results = [
|
|
81
|
+
["fs_read", { content: "file data" }],
|
|
82
|
+
["shell", { output: "ok" }],
|
|
83
|
+
]
|
|
84
|
+
env = build_env(span: span, tool_results: results)
|
|
85
|
+
middleware.call(env)
|
|
86
|
+
|
|
87
|
+
expect(span).to have_received(:set_attribute).with("brute.tool_results.count", 2)
|
|
88
|
+
expect(span).to have_received(:add_event).with(
|
|
89
|
+
"tool_result",
|
|
90
|
+
attributes: hash_including("tool.name" => "fs_read", "tool.status" => "ok")
|
|
91
|
+
)
|
|
92
|
+
expect(span).to have_received(:add_event).with(
|
|
93
|
+
"tool_result",
|
|
94
|
+
attributes: hash_including("tool.name" => "shell", "tool.status" => "ok")
|
|
95
|
+
)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
it "records error status and message for failed tool results" do
|
|
99
|
+
results = [
|
|
100
|
+
["fs_read", { error: "not found" }],
|
|
101
|
+
]
|
|
102
|
+
env = build_env(span: span, tool_results: results)
|
|
103
|
+
middleware.call(env)
|
|
104
|
+
|
|
105
|
+
expect(span).to have_received(:add_event).with(
|
|
106
|
+
"tool_result",
|
|
107
|
+
attributes: hash_including(
|
|
108
|
+
"tool.name" => "fs_read",
|
|
109
|
+
"tool.status" => "error",
|
|
110
|
+
"tool.error" => "not found"
|
|
111
|
+
)
|
|
112
|
+
)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
it "handles a mix of successful and failed results" do
|
|
116
|
+
results = [
|
|
117
|
+
["fs_read", { content: "ok" }],
|
|
118
|
+
["shell", { error: "exit code 1" }],
|
|
119
|
+
["fs_write", { success: true }],
|
|
120
|
+
]
|
|
121
|
+
env = build_env(span: span, tool_results: results)
|
|
122
|
+
middleware.call(env)
|
|
123
|
+
|
|
124
|
+
expect(span).to have_received(:set_attribute).with("brute.tool_results.count", 3)
|
|
125
|
+
expect(span).to have_received(:add_event).exactly(3).times
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
end
|
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
if __FILE__ == $0
|
|
4
|
+
require "bundler/setup"
|
|
5
|
+
require "brute"
|
|
6
|
+
end
|
|
7
|
+
|
|
3
8
|
module Brute
|
|
4
9
|
module Middleware
|
|
5
10
|
# Handles reasoning/thinking content across model switches.
|
|
@@ -96,3 +101,117 @@ module Brute
|
|
|
96
101
|
end
|
|
97
102
|
end
|
|
98
103
|
end
|
|
104
|
+
|
|
105
|
+
if __FILE__ == $0
|
|
106
|
+
require_relative "../../../spec/spec_helper"
|
|
107
|
+
|
|
108
|
+
RSpec.describe Brute::Middleware::ReasoningNormalizer do
|
|
109
|
+
let(:response) { MockResponse.new(content: "reasoned response") }
|
|
110
|
+
let(:inner_app) { ->(_env) { response } }
|
|
111
|
+
|
|
112
|
+
# Build a provider whose class name contains the given string.
|
|
113
|
+
def make_provider(type_name)
|
|
114
|
+
klass = Class.new do
|
|
115
|
+
define_method(:name) { :mock }
|
|
116
|
+
define_method(:default_model) { "mock-model" }
|
|
117
|
+
define_method(:user_role) { :user }
|
|
118
|
+
define_method(:system_role) { :system }
|
|
119
|
+
define_method(:assistant_role) { :assistant }
|
|
120
|
+
define_method(:tool_role) { :tool }
|
|
121
|
+
define_method(:tracer) { nil }
|
|
122
|
+
define_method(:tracer=) { |*| }
|
|
123
|
+
define_method(:complete) { |*_args, **_kw| MockResponse.new(content: "ok") }
|
|
124
|
+
end
|
|
125
|
+
# Override class name to trigger provider_type detection
|
|
126
|
+
klass.define_method(:class) do
|
|
127
|
+
c = super()
|
|
128
|
+
name_str = "LLM::#{type_name}"
|
|
129
|
+
c.define_singleton_method(:name) { name_str }
|
|
130
|
+
c
|
|
131
|
+
end
|
|
132
|
+
klass.new
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
context "with Anthropic provider and budget_tokens" do
|
|
136
|
+
it "injects thinking param into env[:params]" do
|
|
137
|
+
provider = make_provider("Anthropic")
|
|
138
|
+
middleware = described_class.new(inner_app, model_id: "claude-4", budget_tokens: 8000, enabled: true)
|
|
139
|
+
env = build_env(provider: provider, params: {})
|
|
140
|
+
|
|
141
|
+
middleware.call(env)
|
|
142
|
+
|
|
143
|
+
expect(env[:params][:thinking]).to eq({ type: "enabled", budget_tokens: 8000 })
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
context "with Anthropic provider without budget_tokens" do
|
|
148
|
+
it "does not inject thinking param" do
|
|
149
|
+
provider = make_provider("Anthropic")
|
|
150
|
+
middleware = described_class.new(inner_app, model_id: "claude-4", enabled: true)
|
|
151
|
+
env = build_env(provider: provider, params: {})
|
|
152
|
+
|
|
153
|
+
middleware.call(env)
|
|
154
|
+
|
|
155
|
+
expect(env[:params][:thinking]).to be_nil
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
context "with OpenAI provider" do
|
|
160
|
+
it "injects reasoning_effort based on effort level" do
|
|
161
|
+
provider = make_provider("OpenAI")
|
|
162
|
+
middleware = described_class.new(inner_app, model_id: "o3", effort: :high, enabled: true)
|
|
163
|
+
env = build_env(provider: provider, params: {})
|
|
164
|
+
|
|
165
|
+
middleware.call(env)
|
|
166
|
+
|
|
167
|
+
expect(env[:params][:reasoning_effort]).to eq("high")
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
it "maps effort levels correctly" do
|
|
171
|
+
provider = make_provider("OpenAI")
|
|
172
|
+
|
|
173
|
+
{ low: "low", medium: "medium", high: "high", minimal: "low", max: "high" }.each do |effort, expected|
|
|
174
|
+
middleware = described_class.new(inner_app, model_id: "o3", effort: effort, enabled: true)
|
|
175
|
+
env = build_env(provider: provider, params: {})
|
|
176
|
+
middleware.call(env)
|
|
177
|
+
expect(env[:params][:reasoning_effort]).to eq(expected), "Expected effort #{effort} to map to #{expected}"
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
context "with unknown provider" do
|
|
183
|
+
it "does not inject any reasoning params" do
|
|
184
|
+
provider = make_provider("Mistral")
|
|
185
|
+
middleware = described_class.new(inner_app, model_id: "mistral-large", enabled: true)
|
|
186
|
+
env = build_env(provider: provider, params: {})
|
|
187
|
+
|
|
188
|
+
middleware.call(env)
|
|
189
|
+
|
|
190
|
+
expect(env[:params]).to eq({})
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
context "when disabled" do
|
|
195
|
+
it "does not inject reasoning params" do
|
|
196
|
+
provider = make_provider("Anthropic")
|
|
197
|
+
middleware = described_class.new(inner_app, model_id: "claude-4", budget_tokens: 8000, enabled: false)
|
|
198
|
+
env = build_env(provider: provider, params: {})
|
|
199
|
+
|
|
200
|
+
middleware.call(env)
|
|
201
|
+
|
|
202
|
+
expect(env[:params]).to eq({})
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
it "allows model_id to be updated mid-session" do
|
|
207
|
+
middleware = described_class.new(inner_app, model_id: "old-model", enabled: true)
|
|
208
|
+
middleware.model_id = "new-model"
|
|
209
|
+
|
|
210
|
+
provider = make_provider("OpenAI")
|
|
211
|
+
env = build_env(provider: provider, params: {})
|
|
212
|
+
middleware.call(env)
|
|
213
|
+
|
|
214
|
+
expect(env[:params][:reasoning_effort]).not_to be_nil
|
|
215
|
+
end
|
|
216
|
+
end
|
|
217
|
+
end
|
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
if __FILE__ == $0
|
|
4
|
+
require "bundler/setup"
|
|
5
|
+
require "brute"
|
|
6
|
+
end
|
|
7
|
+
|
|
3
8
|
module Brute
|
|
4
9
|
module Middleware
|
|
5
10
|
# Retries the inner call on transient LLM errors with exponential backoff.
|
|
@@ -43,3 +48,91 @@ module Brute
|
|
|
43
48
|
end
|
|
44
49
|
end
|
|
45
50
|
end
|
|
51
|
+
|
|
52
|
+
if __FILE__ == $0
|
|
53
|
+
require_relative "../../../spec/spec_helper"
|
|
54
|
+
|
|
55
|
+
RSpec.describe Brute::Middleware::Retry do
|
|
56
|
+
let(:response) { MockResponse.new(content: "success") }
|
|
57
|
+
|
|
58
|
+
it "returns the response on first successful call" do
|
|
59
|
+
app, calls = mock_inner_app(response: response)
|
|
60
|
+
middleware = described_class.new(app)
|
|
61
|
+
env = build_env
|
|
62
|
+
|
|
63
|
+
result = middleware.call(env)
|
|
64
|
+
|
|
65
|
+
expect(result).to eq(response)
|
|
66
|
+
expect(calls.size).to eq(1)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
it "retries on LLM::RateLimitError and succeeds" do
|
|
70
|
+
app = flaky_inner_app(LLM::RateLimitError, fail_count: 2, response: response)
|
|
71
|
+
middleware = described_class.new(app, max_attempts: 3, base_delay: 2)
|
|
72
|
+
allow(middleware).to receive(:sleep)
|
|
73
|
+
env = build_env
|
|
74
|
+
|
|
75
|
+
result = middleware.call(env)
|
|
76
|
+
|
|
77
|
+
expect(result).to eq(response)
|
|
78
|
+
expect(env[:metadata][:retry_attempt]).to eq(2)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
it "retries on LLM::ServerError and succeeds" do
|
|
82
|
+
app = flaky_inner_app(LLM::ServerError, fail_count: 1, response: response)
|
|
83
|
+
middleware = described_class.new(app, max_attempts: 3, base_delay: 2)
|
|
84
|
+
allow(middleware).to receive(:sleep)
|
|
85
|
+
env = build_env
|
|
86
|
+
|
|
87
|
+
result = middleware.call(env)
|
|
88
|
+
|
|
89
|
+
expect(result).to eq(response)
|
|
90
|
+
expect(env[:metadata][:retry_attempt]).to eq(1)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
it "re-raises after exhausting all attempts" do
|
|
94
|
+
app = failing_inner_app(LLM::RateLimitError, message: "rate limited")
|
|
95
|
+
middleware = described_class.new(app, max_attempts: 3, base_delay: 2)
|
|
96
|
+
allow(middleware).to receive(:sleep)
|
|
97
|
+
env = build_env
|
|
98
|
+
|
|
99
|
+
expect { middleware.call(env) }.to raise_error(LLM::RateLimitError, "rate limited")
|
|
100
|
+
expect(env[:metadata][:last_error]).to eq("rate limited")
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
it "does not retry non-retryable errors" do
|
|
104
|
+
call_count = 0
|
|
105
|
+
app = ->(_env) { call_count += 1; raise ArgumentError, "bad input" }
|
|
106
|
+
middleware = described_class.new(app)
|
|
107
|
+
env = build_env
|
|
108
|
+
|
|
109
|
+
expect { middleware.call(env) }.to raise_error(ArgumentError)
|
|
110
|
+
expect(call_count).to eq(1)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
it "sleeps with exponential backoff delays" do
|
|
114
|
+
app = flaky_inner_app(LLM::RateLimitError, fail_count: 2, response: response)
|
|
115
|
+
middleware = described_class.new(app, max_attempts: 3, base_delay: 2)
|
|
116
|
+
allow(middleware).to receive(:sleep)
|
|
117
|
+
env = build_env
|
|
118
|
+
|
|
119
|
+
middleware.call(env)
|
|
120
|
+
|
|
121
|
+
# base_delay ** attempts: 2**1 = 2, 2**2 = 4
|
|
122
|
+
expect(middleware).to have_received(:sleep).with(2).ordered
|
|
123
|
+
expect(middleware).to have_received(:sleep).with(4).ordered
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
it "records retry_delay in metadata" do
|
|
127
|
+
app = flaky_inner_app(LLM::RateLimitError, fail_count: 1, response: response)
|
|
128
|
+
middleware = described_class.new(app, max_attempts: 3, base_delay: 3)
|
|
129
|
+
allow(middleware).to receive(:sleep)
|
|
130
|
+
env = build_env
|
|
131
|
+
|
|
132
|
+
middleware.call(env)
|
|
133
|
+
|
|
134
|
+
# base_delay ** attempts: 3**1 = 3
|
|
135
|
+
expect(env[:metadata][:retry_delay]).to eq(3)
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
end
|
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
if __FILE__ == $0
|
|
4
|
+
require "bundler/setup"
|
|
5
|
+
require "brute"
|
|
6
|
+
end
|
|
7
|
+
|
|
3
8
|
module Brute
|
|
4
9
|
module Middleware
|
|
5
10
|
# Saves the conversation to disk after each LLM call.
|
|
@@ -27,3 +32,40 @@ module Brute
|
|
|
27
32
|
end
|
|
28
33
|
end
|
|
29
34
|
end
|
|
35
|
+
|
|
36
|
+
if __FILE__ == $0
|
|
37
|
+
require_relative "../../../spec/spec_helper"
|
|
38
|
+
|
|
39
|
+
RSpec.describe Brute::Middleware::SessionPersistence do
|
|
40
|
+
let(:response) { MockResponse.new(content: "saved response") }
|
|
41
|
+
let(:inner_app) { ->(_env) { response } }
|
|
42
|
+
let(:session) { double("session", save: nil) }
|
|
43
|
+
let(:middleware) { described_class.new(inner_app, session: session) }
|
|
44
|
+
|
|
45
|
+
it "passes the response through unchanged" do
|
|
46
|
+
env = build_env
|
|
47
|
+
result = middleware.call(env)
|
|
48
|
+
expect(result).to eq(response)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
it "calls session.save with the context after a successful LLM call" do
|
|
52
|
+
env = build_env
|
|
53
|
+
middleware.call(env)
|
|
54
|
+
expect(session).to have_received(:save).with(env[:context])
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
it "does not propagate session save failures" do
|
|
58
|
+
allow(session).to receive(:save).and_raise(RuntimeError, "disk full")
|
|
59
|
+
env = build_env
|
|
60
|
+
|
|
61
|
+
expect { middleware.call(env) }.not_to raise_error
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
it "prints a warning to stderr on save failure" do
|
|
65
|
+
allow(session).to receive(:save).and_raise(RuntimeError, "disk full")
|
|
66
|
+
env = build_env
|
|
67
|
+
|
|
68
|
+
expect { middleware.call(env) }.to output(/Session save failed: disk full/).to_stderr
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
if __FILE__ == $0
|
|
4
|
+
require "bundler/setup"
|
|
5
|
+
require "brute"
|
|
6
|
+
end
|
|
7
|
+
|
|
3
8
|
module Brute
|
|
4
9
|
module Middleware
|
|
5
10
|
# Tracks cumulative token usage across all LLM calls in a session.
|
|
@@ -44,3 +49,75 @@ module Brute
|
|
|
44
49
|
end
|
|
45
50
|
end
|
|
46
51
|
end
|
|
52
|
+
|
|
53
|
+
if __FILE__ == $0
|
|
54
|
+
require_relative "../../../spec/spec_helper"
|
|
55
|
+
|
|
56
|
+
RSpec.describe Brute::Middleware::TokenTracking do
|
|
57
|
+
let(:response) do
|
|
58
|
+
MockResponse.new(
|
|
59
|
+
content: "hello",
|
|
60
|
+
usage: LLM::Usage.new(input_tokens: 100, output_tokens: 50, reasoning_tokens: 10, total_tokens: 160)
|
|
61
|
+
)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
let(:inner_app) { ->(_env) { response } }
|
|
65
|
+
let(:middleware) { described_class.new(inner_app) }
|
|
66
|
+
|
|
67
|
+
it "passes the response through unchanged" do
|
|
68
|
+
env = build_env
|
|
69
|
+
result = middleware.call(env)
|
|
70
|
+
expect(result).to eq(response)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
it "populates env[:metadata][:tokens] with correct values" do
|
|
74
|
+
env = build_env
|
|
75
|
+
middleware.call(env)
|
|
76
|
+
|
|
77
|
+
tokens = env[:metadata][:tokens]
|
|
78
|
+
expect(tokens[:total_input]).to eq(100)
|
|
79
|
+
expect(tokens[:total_output]).to eq(50)
|
|
80
|
+
expect(tokens[:total_reasoning]).to eq(10)
|
|
81
|
+
expect(tokens[:total]).to eq(150) # input + output
|
|
82
|
+
expect(tokens[:call_count]).to eq(1)
|
|
83
|
+
expect(tokens[:last_call]).to eq(input: 100, output: 50, total: 160)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
it "accumulates token counts across multiple calls" do
|
|
87
|
+
env = build_env
|
|
88
|
+
middleware.call(env)
|
|
89
|
+
middleware.call(env)
|
|
90
|
+
|
|
91
|
+
tokens = env[:metadata][:tokens]
|
|
92
|
+
expect(tokens[:total_input]).to eq(200)
|
|
93
|
+
expect(tokens[:total_output]).to eq(100)
|
|
94
|
+
expect(tokens[:total_reasoning]).to eq(20)
|
|
95
|
+
expect(tokens[:call_count]).to eq(2)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
it "handles a response without usage gracefully" do
|
|
99
|
+
no_usage_response = double("response")
|
|
100
|
+
allow(no_usage_response).to receive(:respond_to?).with(:usage).and_return(false)
|
|
101
|
+
app = ->(_env) { no_usage_response }
|
|
102
|
+
mw = described_class.new(app)
|
|
103
|
+
|
|
104
|
+
env = build_env
|
|
105
|
+
result = mw.call(env)
|
|
106
|
+
|
|
107
|
+
expect(result).to eq(no_usage_response)
|
|
108
|
+
expect(env[:metadata][:tokens]).to be_nil
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
it "handles a response where usage returns nil" do
|
|
112
|
+
nil_usage_response = double("response", usage: nil)
|
|
113
|
+
allow(nil_usage_response).to receive(:respond_to?).with(:usage).and_return(true)
|
|
114
|
+
app = ->(_env) { nil_usage_response }
|
|
115
|
+
mw = described_class.new(app)
|
|
116
|
+
|
|
117
|
+
env = build_env
|
|
118
|
+
mw.call(env)
|
|
119
|
+
|
|
120
|
+
expect(env[:metadata][:tokens]).to be_nil
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
end
|
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
if __FILE__ == $0
|
|
4
|
+
require "bundler/setup"
|
|
5
|
+
require "brute"
|
|
6
|
+
end
|
|
7
|
+
|
|
3
8
|
module Brute
|
|
4
9
|
module Middleware
|
|
5
10
|
# Tracks per-tool error counts and total tool call count across LLM
|
|
@@ -50,3 +55,99 @@ module Brute
|
|
|
50
55
|
end
|
|
51
56
|
end
|
|
52
57
|
end
|
|
58
|
+
|
|
59
|
+
if __FILE__ == $0
|
|
60
|
+
require_relative "../../../spec/spec_helper"
|
|
61
|
+
|
|
62
|
+
RSpec.describe Brute::Middleware::ToolErrorTracking do
|
|
63
|
+
let(:response) { MockResponse.new(content: "tracked") }
|
|
64
|
+
let(:inner_app) { ->(_env) { response } }
|
|
65
|
+
let(:middleware) { described_class.new(inner_app, max_failures: 3) }
|
|
66
|
+
|
|
67
|
+
it "passes the response through" do
|
|
68
|
+
env = build_env
|
|
69
|
+
result = middleware.call(env)
|
|
70
|
+
expect(result).to eq(response)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
it "reports zero tool calls when tool_results is nil" do
|
|
74
|
+
env = build_env(tool_results: nil)
|
|
75
|
+
middleware.call(env)
|
|
76
|
+
|
|
77
|
+
expect(env[:metadata][:tool_calls]).to eq(0)
|
|
78
|
+
expect(env[:metadata][:tool_errors]).to eq({})
|
|
79
|
+
expect(env[:metadata][:tool_error_limit_reached]).to be false
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
it "counts total tool calls from tool_results" do
|
|
83
|
+
results = [
|
|
84
|
+
["fs_read", { content: "data" }],
|
|
85
|
+
["shell", { output: "ok" }],
|
|
86
|
+
["fs_write", { success: true }],
|
|
87
|
+
]
|
|
88
|
+
env = build_env(tool_results: results)
|
|
89
|
+
middleware.call(env)
|
|
90
|
+
|
|
91
|
+
expect(env[:metadata][:tool_calls]).to eq(3)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
it "counts per-tool errors from results with error key" do
|
|
95
|
+
results = [
|
|
96
|
+
["fs_read", { error: "not found" }],
|
|
97
|
+
["fs_read", { error: "permission denied" }],
|
|
98
|
+
["shell", { output: "ok" }],
|
|
99
|
+
]
|
|
100
|
+
env = build_env(tool_results: results)
|
|
101
|
+
middleware.call(env)
|
|
102
|
+
|
|
103
|
+
expect(env[:metadata][:tool_errors]).to eq({ "fs_read" => 2 })
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
it "sets tool_error_limit_reached when a tool hits max_failures" do
|
|
107
|
+
results = [
|
|
108
|
+
["fs_read", { error: "fail 1" }],
|
|
109
|
+
["fs_read", { error: "fail 2" }],
|
|
110
|
+
["fs_read", { error: "fail 3" }],
|
|
111
|
+
]
|
|
112
|
+
env = build_env(tool_results: results)
|
|
113
|
+
middleware.call(env)
|
|
114
|
+
|
|
115
|
+
expect(env[:metadata][:tool_error_limit_reached]).to be true
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
it "does not flag below the threshold" do
|
|
119
|
+
results = [
|
|
120
|
+
["fs_read", { error: "fail 1" }],
|
|
121
|
+
["fs_read", { error: "fail 2" }],
|
|
122
|
+
]
|
|
123
|
+
env = build_env(tool_results: results)
|
|
124
|
+
middleware.call(env)
|
|
125
|
+
|
|
126
|
+
expect(env[:metadata][:tool_error_limit_reached]).to be false
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
it "accumulates counts across multiple calls" do
|
|
130
|
+
env1 = build_env(tool_results: [["fs_read", { error: "fail" }]])
|
|
131
|
+
middleware.call(env1)
|
|
132
|
+
|
|
133
|
+
env2 = build_env(tool_results: [["fs_read", { error: "fail again" }], ["shell", { output: "ok" }]])
|
|
134
|
+
middleware.call(env2)
|
|
135
|
+
|
|
136
|
+
expect(env2[:metadata][:tool_calls]).to eq(3) # 1 + 2
|
|
137
|
+
expect(env2[:metadata][:tool_errors]).to eq({ "fs_read" => 2 })
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
it "clears counters on reset!" do
|
|
141
|
+
env = build_env(tool_results: [["fs_read", { error: "fail" }]])
|
|
142
|
+
middleware.call(env)
|
|
143
|
+
|
|
144
|
+
middleware.reset!
|
|
145
|
+
|
|
146
|
+
env2 = build_env(tool_results: nil)
|
|
147
|
+
middleware.call(env2)
|
|
148
|
+
|
|
149
|
+
expect(env2[:metadata][:tool_calls]).to eq(0)
|
|
150
|
+
expect(env2[:metadata][:tool_errors]).to eq({})
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
end
|
|
@@ -56,7 +56,7 @@ module Brute
|
|
|
56
56
|
stream = resolve_stream(ctx)
|
|
57
57
|
if stream
|
|
58
58
|
data = stream.pending_tool_calls.dup
|
|
59
|
-
stream.
|
|
59
|
+
stream.clear_pending_tool_calls!
|
|
60
60
|
data
|
|
61
61
|
else
|
|
62
62
|
[]
|
|
@@ -95,3 +95,72 @@ module Brute
|
|
|
95
95
|
end
|
|
96
96
|
end
|
|
97
97
|
end
|
|
98
|
+
|
|
99
|
+
if __FILE__ == $0
|
|
100
|
+
require_relative "../../../spec/spec_helper"
|
|
101
|
+
|
|
102
|
+
RSpec.describe Brute::Middleware::ToolUseGuard do
|
|
103
|
+
let(:provider) { MockProvider.new }
|
|
104
|
+
|
|
105
|
+
# Helper: build a response that produces pending tool calls (functions) in the context.
|
|
106
|
+
def make_tool_response(tool_calls:)
|
|
107
|
+
MockResponse.new(content: "", tool_calls: tool_calls)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
it "passes the response through when there are no pending functions" do
|
|
111
|
+
response = MockResponse.new(content: "no tools")
|
|
112
|
+
allow(provider).to receive(:complete).and_return(response)
|
|
113
|
+
|
|
114
|
+
ctx = LLM::Context.new(provider, tools: [])
|
|
115
|
+
prompt = ctx.prompt { |p| p.system("sys"); p.user("hi") }
|
|
116
|
+
|
|
117
|
+
inner_app = ->(_env) { ctx.talk(prompt); response }
|
|
118
|
+
middleware = described_class.new(inner_app)
|
|
119
|
+
env = build_env(context: ctx, provider: provider)
|
|
120
|
+
|
|
121
|
+
result = middleware.call(env)
|
|
122
|
+
expect(result).to eq(response)
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
it "does not inject a synthetic message when the assistant message already has tool_call?" do
|
|
126
|
+
tool_calls = [{ id: "toolu_1", name: "fs_read", arguments: { "path" => "test.rb" } }]
|
|
127
|
+
response = make_tool_response(tool_calls: tool_calls)
|
|
128
|
+
allow(provider).to receive(:complete).and_return(response)
|
|
129
|
+
|
|
130
|
+
ctx = LLM::Context.new(provider, tools: [])
|
|
131
|
+
prompt = ctx.prompt { |p| p.system("sys"); p.user("read it") }
|
|
132
|
+
|
|
133
|
+
inner_app = ->(_env) { ctx.talk(prompt); response }
|
|
134
|
+
middleware = described_class.new(inner_app)
|
|
135
|
+
env = build_env(context: ctx, provider: provider)
|
|
136
|
+
|
|
137
|
+
middleware.call(env)
|
|
138
|
+
|
|
139
|
+
messages = ctx.messages.to_a
|
|
140
|
+
assistant_msgs = messages.select { |m| m.role.to_s == "assistant" }
|
|
141
|
+
# Should only have the original assistant message, no synthetic
|
|
142
|
+
expect(assistant_msgs.size).to eq(1)
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
it "injects a synthetic assistant message when tool calls exist but assistant is missing" do
|
|
146
|
+
tool_calls = [{ id: "toolu_1", name: "fs_read", arguments: { "path" => "test.rb" } }]
|
|
147
|
+
response = MockResponse.new(content: "")
|
|
148
|
+
# Simulate the bug: choices[-1] is nil, so no assistant message stored
|
|
149
|
+
allow(response).to receive(:choices).and_return([nil])
|
|
150
|
+
allow(provider).to receive(:complete).and_return(response)
|
|
151
|
+
|
|
152
|
+
ctx = LLM::Context.new(provider, tools: [])
|
|
153
|
+
prompt = ctx.prompt { |p| p.system("sys"); p.user("read it") }
|
|
154
|
+
|
|
155
|
+
inner_app = ->(_env) do
|
|
156
|
+
ctx.talk(prompt)
|
|
157
|
+
response
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
middleware = described_class.new(inner_app)
|
|
161
|
+
env = build_env(context: ctx, provider: provider)
|
|
162
|
+
|
|
163
|
+
expect { middleware.call(env) }.not_to raise_error
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
end
|