brute 0.4.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/brute/agent.rb +14 -0
- data/lib/brute/diff.rb +18 -28
- data/lib/brute/loop/agent_stream.rb +118 -0
- data/lib/brute/loop/agent_turn.rb +520 -0
- data/lib/brute/{compactor.rb → loop/compactor.rb} +2 -0
- data/lib/brute/{doom_loop.rb → loop/doom_loop.rb} +2 -0
- data/lib/brute/loop/step.rb +332 -0
- data/lib/brute/loop/tool_call_step.rb +90 -0
- data/lib/brute/middleware/compaction_check.rb +60 -146
- data/lib/brute/middleware/doom_loop_detection.rb +95 -92
- data/lib/brute/middleware/llm_call.rb +78 -80
- data/lib/brute/middleware/message_tracking.rb +115 -162
- data/lib/brute/middleware/otel/span.rb +25 -106
- data/lib/brute/middleware/otel/token_usage.rb +29 -84
- data/lib/brute/middleware/otel/tool_calls.rb +23 -107
- data/lib/brute/middleware/otel/tool_results.rb +22 -86
- data/lib/brute/middleware/reasoning_normalizer.rb +78 -103
- data/lib/brute/middleware/retry.rb +95 -76
- data/lib/brute/middleware/session_persistence.rb +38 -37
- data/lib/brute/middleware/token_tracking.rb +64 -63
- data/lib/brute/middleware/tool_error_tracking.rb +108 -82
- data/lib/brute/middleware/tool_use_guard.rb +57 -90
- data/lib/brute/middleware/tracing.rb +53 -63
- data/lib/brute/middleware.rb +18 -0
- data/lib/brute/orchestrator/turn.rb +105 -0
- data/lib/brute/pipeline.rb +77 -133
- data/lib/brute/prompts/build_switch.rb +21 -25
- data/lib/brute/prompts/environment.rb +31 -35
- data/lib/brute/prompts/identity.rb +22 -29
- data/lib/brute/prompts/instructions.rb +15 -18
- data/lib/brute/prompts/max_steps.rb +18 -25
- data/lib/brute/prompts/plan_reminder.rb +18 -26
- data/lib/brute/prompts/skills.rb +8 -30
- data/lib/brute/prompts.rb +28 -0
- data/lib/brute/providers/ollama.rb +135 -0
- data/lib/brute/providers/shell.rb +2 -2
- data/lib/brute/providers/shell_response.rb +2 -2
- data/lib/brute/providers.rb +62 -0
- data/lib/brute/queue/base_queue.rb +222 -0
- data/lib/brute/{file_mutation_queue.rb → queue/file_mutation_queue.rb} +28 -26
- data/lib/brute/queue/parallel_queue.rb +66 -0
- data/lib/brute/queue/sequential_queue.rb +63 -0
- data/lib/brute/store/message_store.rb +362 -0
- data/lib/brute/store/session.rb +106 -0
- data/lib/brute/{snapshot_store.rb → store/snapshot_store.rb} +2 -0
- data/lib/brute/{todo_store.rb → store/todo_store.rb} +2 -0
- data/lib/brute/system_prompt.rb +81 -194
- data/lib/brute/tools/delegate.rb +46 -116
- data/lib/brute/tools/fs_patch.rb +36 -37
- data/lib/brute/tools/fs_remove.rb +2 -2
- data/lib/brute/tools/fs_undo.rb +2 -2
- data/lib/brute/tools/fs_write.rb +29 -41
- data/lib/brute/tools/todo_read.rb +1 -1
- data/lib/brute/tools/todo_write.rb +1 -1
- data/lib/brute/tools.rb +31 -0
- data/lib/brute/version.rb +1 -1
- data/lib/brute.rb +40 -204
- metadata +31 -20
- data/lib/brute/agent_stream.rb +0 -181
- data/lib/brute/hooks.rb +0 -84
- data/lib/brute/message_store.rb +0 -463
- data/lib/brute/orchestrator.rb +0 -550
- data/lib/brute/session.rb +0 -161
|
@@ -1,9 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
require "brute"
|
|
6
|
-
end
|
|
3
|
+
require "bundler/setup"
|
|
4
|
+
require "brute"
|
|
7
5
|
|
|
8
6
|
module Brute
|
|
9
7
|
module Middleware
|
|
@@ -15,7 +13,7 @@ module Brute
|
|
|
15
13
|
# and counts failures and totals.
|
|
16
14
|
#
|
|
17
15
|
# When any tool exceeds max_failures, it sets env[:metadata][:tool_error_limit_reached]
|
|
18
|
-
# so the
|
|
16
|
+
# so the agent loop can decide to stop.
|
|
19
17
|
#
|
|
20
18
|
# Also stores env[:metadata][:tool_calls] with the cumulative number of
|
|
21
19
|
# tool invocations in the current session.
|
|
@@ -44,6 +42,15 @@ module Brute
|
|
|
44
42
|
env[:metadata][:tool_errors] = @errors.dup
|
|
45
43
|
env[:metadata][:tool_error_limit_reached] = @errors.any? { |_, c| c >= @max_failures }
|
|
46
44
|
|
|
45
|
+
if env[:metadata][:tool_error_limit_reached]
|
|
46
|
+
failed_tool, fail_count = @errors.max_by { |_, c| c }
|
|
47
|
+
env[:should_exit] ||= {
|
|
48
|
+
reason: "tool_error_limit_reached",
|
|
49
|
+
message: "Tool '#{failed_tool}' has failed #{fail_count} times (limit: #{@max_failures}). Stopping.",
|
|
50
|
+
source: "ToolErrorTracking",
|
|
51
|
+
}
|
|
52
|
+
end
|
|
53
|
+
|
|
47
54
|
@app.call(env)
|
|
48
55
|
end
|
|
49
56
|
|
|
@@ -56,98 +63,117 @@ module Brute
|
|
|
56
63
|
end
|
|
57
64
|
end
|
|
58
65
|
|
|
59
|
-
|
|
60
|
-
require_relative "../../../spec/
|
|
61
|
-
|
|
62
|
-
RSpec.describe Brute::Middleware::ToolErrorTracking do
|
|
63
|
-
let(:response) { MockResponse.new(content: "tracked") }
|
|
64
|
-
let(:inner_app) { ->(_env) { response } }
|
|
65
|
-
let(:middleware) { described_class.new(inner_app, max_failures: 3) }
|
|
66
|
+
test do
|
|
67
|
+
require_relative "../../../spec/support/mock_provider"
|
|
68
|
+
require_relative "../../../spec/support/mock_response"
|
|
66
69
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
it "reports zero tool calls when tool_results is nil" do
|
|
74
|
-
env = build_env(tool_results: nil)
|
|
75
|
-
middleware.call(env)
|
|
76
|
-
|
|
77
|
-
expect(env[:metadata][:tool_calls]).to eq(0)
|
|
78
|
-
expect(env[:metadata][:tool_errors]).to eq({})
|
|
79
|
-
expect(env[:metadata][:tool_error_limit_reached]).to be false
|
|
80
|
-
end
|
|
70
|
+
def build_env(**overrides)
|
|
71
|
+
{ provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
|
|
72
|
+
messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
|
|
73
|
+
tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
|
|
74
|
+
end
|
|
81
75
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
["fs_write", { success: true }],
|
|
87
|
-
]
|
|
88
|
-
env = build_env(tool_results: results)
|
|
89
|
-
middleware.call(env)
|
|
76
|
+
def make_middleware(app = nil)
|
|
77
|
+
app ||= ->(_env) { MockResponse.new(content: "tracked") }
|
|
78
|
+
Brute::Middleware::ToolErrorTracking.new(app, max_failures: 3)
|
|
79
|
+
end
|
|
90
80
|
|
|
91
|
-
|
|
92
|
-
|
|
81
|
+
it "passes the response through" do
|
|
82
|
+
response = MockResponse.new(content: "tracked")
|
|
83
|
+
app = ->(_env) { response }
|
|
84
|
+
result = make_middleware(app).call(build_env)
|
|
85
|
+
result.should == response
|
|
86
|
+
end
|
|
93
87
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
]
|
|
100
|
-
env = build_env(tool_results: results)
|
|
101
|
-
middleware.call(env)
|
|
88
|
+
it "reports zero tool calls when tool_results is nil" do
|
|
89
|
+
env = build_env(tool_results: nil)
|
|
90
|
+
make_middleware.call(env)
|
|
91
|
+
env[:metadata][:tool_calls].should == 0
|
|
92
|
+
end
|
|
102
93
|
|
|
103
|
-
|
|
104
|
-
|
|
94
|
+
it "reports empty tool errors when tool_results is nil" do
|
|
95
|
+
env = build_env(tool_results: nil)
|
|
96
|
+
make_middleware.call(env)
|
|
97
|
+
env[:metadata][:tool_errors].should == {}
|
|
98
|
+
end
|
|
105
99
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
]
|
|
112
|
-
env = build_env(tool_results: results)
|
|
113
|
-
middleware.call(env)
|
|
100
|
+
it "does not flag limit reached when tool_results is nil" do
|
|
101
|
+
env = build_env(tool_results: nil)
|
|
102
|
+
make_middleware.call(env)
|
|
103
|
+
env[:metadata][:tool_error_limit_reached].should.be.false
|
|
104
|
+
end
|
|
114
105
|
|
|
115
|
-
|
|
116
|
-
|
|
106
|
+
it "counts total tool calls from tool_results" do
|
|
107
|
+
results = [["fs_read", { content: "data" }], ["shell", { output: "ok" }], ["fs_write", { success: true }]]
|
|
108
|
+
env = build_env(tool_results: results)
|
|
109
|
+
make_middleware.call(env)
|
|
110
|
+
env[:metadata][:tool_calls].should == 3
|
|
111
|
+
end
|
|
117
112
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
middleware.call(env)
|
|
113
|
+
it "counts per-tool errors from results with error key" do
|
|
114
|
+
results = [["fs_read", { error: "not found" }], ["fs_read", { error: "denied" }], ["shell", { output: "ok" }]]
|
|
115
|
+
env = build_env(tool_results: results)
|
|
116
|
+
make_middleware.call(env)
|
|
117
|
+
env[:metadata][:tool_errors].should == { "fs_read" => 2 }
|
|
118
|
+
end
|
|
125
119
|
|
|
126
|
-
|
|
127
|
-
|
|
120
|
+
it "sets tool_error_limit_reached when a tool hits max_failures" do
|
|
121
|
+
results = [["fs_read", { error: "1" }], ["fs_read", { error: "2" }], ["fs_read", { error: "3" }]]
|
|
122
|
+
env = build_env(tool_results: results)
|
|
123
|
+
make_middleware.call(env)
|
|
124
|
+
env[:metadata][:tool_error_limit_reached].should.be.true
|
|
125
|
+
end
|
|
128
126
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
127
|
+
it "does not flag below the threshold" do
|
|
128
|
+
results = [["fs_read", { error: "1" }], ["fs_read", { error: "2" }]]
|
|
129
|
+
env = build_env(tool_results: results)
|
|
130
|
+
make_middleware.call(env)
|
|
131
|
+
env[:metadata][:tool_error_limit_reached].should.be.false
|
|
132
|
+
end
|
|
132
133
|
|
|
133
|
-
|
|
134
|
-
|
|
134
|
+
it "accumulates counts across multiple calls" do
|
|
135
|
+
mw = make_middleware
|
|
136
|
+
mw.call(build_env(tool_results: [["fs_read", { error: "fail" }]]))
|
|
137
|
+
env2 = build_env(tool_results: [["fs_read", { error: "again" }], ["shell", { output: "ok" }]])
|
|
138
|
+
mw.call(env2)
|
|
139
|
+
env2[:metadata][:tool_calls].should == 3
|
|
140
|
+
end
|
|
135
141
|
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
142
|
+
it "clears counters on reset!" do
|
|
143
|
+
mw = make_middleware
|
|
144
|
+
mw.call(build_env(tool_results: [["fs_read", { error: "fail" }]]))
|
|
145
|
+
mw.reset!
|
|
146
|
+
env2 = build_env(tool_results: nil)
|
|
147
|
+
mw.call(env2)
|
|
148
|
+
env2[:metadata][:tool_calls].should == 0
|
|
149
|
+
end
|
|
139
150
|
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
151
|
+
it "sets should_exit reason when error limit reached" do
|
|
152
|
+
results = [["fs_read", { error: "1" }], ["fs_read", { error: "2" }], ["fs_read", { error: "3" }]]
|
|
153
|
+
env = build_env(tool_results: results)
|
|
154
|
+
make_middleware.call(env)
|
|
155
|
+
env[:should_exit][:reason].should == "tool_error_limit_reached"
|
|
156
|
+
end
|
|
143
157
|
|
|
144
|
-
|
|
158
|
+
it "sets should_exit source to ToolErrorTracking" do
|
|
159
|
+
results = [["fs_read", { error: "1" }], ["fs_read", { error: "2" }], ["fs_read", { error: "3" }]]
|
|
160
|
+
env = build_env(tool_results: results)
|
|
161
|
+
make_middleware.call(env)
|
|
162
|
+
env[:should_exit][:source].should == "ToolErrorTracking"
|
|
163
|
+
end
|
|
145
164
|
|
|
146
|
-
|
|
147
|
-
|
|
165
|
+
it "does not set should_exit below the threshold" do
|
|
166
|
+
results = [["fs_read", { error: "1" }], ["fs_read", { error: "2" }]]
|
|
167
|
+
env = build_env(tool_results: results)
|
|
168
|
+
make_middleware.call(env)
|
|
169
|
+
env[:should_exit].should.be.nil
|
|
170
|
+
end
|
|
148
171
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
172
|
+
it "does not overwrite should_exit if already set" do
|
|
173
|
+
results = [["fs_read", { error: "1" }], ["fs_read", { error: "2" }], ["fs_read", { error: "3" }]]
|
|
174
|
+
existing = { reason: "doom_loop_detected", message: "loop", source: "DoomLoopDetection" }
|
|
175
|
+
env = build_env(tool_results: results, should_exit: existing)
|
|
176
|
+
make_middleware.call(env)
|
|
177
|
+
env[:should_exit][:reason].should == "doom_loop_detected"
|
|
152
178
|
end
|
|
153
179
|
end
|
|
@@ -1,24 +1,27 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "bundler/setup"
|
|
4
|
+
require "brute"
|
|
5
|
+
|
|
3
6
|
module Brute
|
|
4
7
|
module Middleware
|
|
5
8
|
# Guards against tool-only LLM responses where the assistant message
|
|
6
9
|
# is dropped from the context buffer.
|
|
7
10
|
#
|
|
8
11
|
# When the LLM responds with only tool_use blocks (no text), llm.rb's
|
|
9
|
-
# response adapter produces empty choices.
|
|
10
|
-
#
|
|
11
|
-
#
|
|
12
|
-
#
|
|
12
|
+
# response adapter produces empty choices. The assistant message carrying
|
|
13
|
+
# tool_use blocks may be lost. This causes "unexpected tool_use_id" on
|
|
14
|
+
# the next call because tool_result references a tool_use that's missing
|
|
15
|
+
# from the message history.
|
|
13
16
|
#
|
|
14
17
|
# This middleware runs post-call and ensures every pending tool_use ID
|
|
15
|
-
# is covered by an assistant message in
|
|
18
|
+
# is covered by an assistant message in env[:messages]. It handles three
|
|
16
19
|
# cases:
|
|
17
20
|
#
|
|
18
|
-
# 1.
|
|
19
|
-
# 2.
|
|
21
|
+
# 1. pending_functions is non-empty and the assistant message exists → no-op
|
|
22
|
+
# 2. pending_functions is non-empty but the assistant message is missing
|
|
20
23
|
# (or has different IDs) → inject synthetic message
|
|
21
|
-
# 3.
|
|
24
|
+
# 3. pending_functions is empty (nil-choice bug) but the stream recorded
|
|
22
25
|
# tool calls → inject synthetic message using stream metadata
|
|
23
26
|
#
|
|
24
27
|
class ToolUseGuard
|
|
@@ -29,32 +32,30 @@ module Brute
|
|
|
29
32
|
def call(env)
|
|
30
33
|
response = @app.call(env)
|
|
31
34
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
# stream's recorded metadata (fallback for nil-choice bug).
|
|
36
|
-
tool_data = collect_tool_data(ctx, env)
|
|
35
|
+
# Collect pending tool data from env[:pending_functions] (primary)
|
|
36
|
+
# or the stream's recorded metadata (fallback for nil-choice bug).
|
|
37
|
+
tool_data = collect_tool_data(env)
|
|
37
38
|
return response if tool_data.empty?
|
|
38
39
|
|
|
39
40
|
# Find all tool_use IDs already covered by assistant messages.
|
|
40
|
-
covered_ids = covered_tool_ids(
|
|
41
|
+
covered_ids = covered_tool_ids(env[:messages])
|
|
41
42
|
|
|
42
43
|
# Inject a synthetic assistant message for any uncovered tool calls.
|
|
43
44
|
uncovered = tool_data.reject { |td| covered_ids.include?(td[:id]) }
|
|
44
|
-
inject_synthetic!(
|
|
45
|
+
inject_synthetic!(env[:messages], uncovered) unless uncovered.empty?
|
|
45
46
|
|
|
46
47
|
response
|
|
47
48
|
end
|
|
48
49
|
|
|
49
50
|
private
|
|
50
51
|
|
|
51
|
-
def collect_tool_data(
|
|
52
|
-
functions =
|
|
52
|
+
def collect_tool_data(env)
|
|
53
|
+
functions = env[:pending_functions]
|
|
53
54
|
if functions && !functions.empty?
|
|
54
55
|
functions.map { |fn| { id: fn.id, name: fn.name, arguments: fn.arguments } }
|
|
55
56
|
elsif env[:streaming]
|
|
56
|
-
stream =
|
|
57
|
-
if stream
|
|
57
|
+
stream = env[:stream]
|
|
58
|
+
if stream&.respond_to?(:pending_tool_calls)
|
|
58
59
|
data = stream.pending_tool_calls.dup
|
|
59
60
|
stream.clear_pending_tool_calls!
|
|
60
61
|
data
|
|
@@ -66,19 +67,14 @@ module Brute
|
|
|
66
67
|
end
|
|
67
68
|
end
|
|
68
69
|
|
|
69
|
-
def
|
|
70
|
-
|
|
71
|
-
stream if stream.respond_to?(:pending_tool_calls)
|
|
72
|
-
end
|
|
73
|
-
|
|
74
|
-
def covered_tool_ids(ctx)
|
|
75
|
-
ctx.messages.to_a
|
|
70
|
+
def covered_tool_ids(messages)
|
|
71
|
+
messages
|
|
76
72
|
.select { |m| m.role.to_s == "assistant" && m.tool_call? }
|
|
77
73
|
.flat_map { |m| (m.extra.original_tool_calls || []).map { |tc| tc["id"] } }
|
|
78
74
|
.to_set
|
|
79
75
|
end
|
|
80
76
|
|
|
81
|
-
def inject_synthetic!(
|
|
77
|
+
def inject_synthetic!(messages, uncovered)
|
|
82
78
|
tool_calls = uncovered.map do |td|
|
|
83
79
|
LLM::Object.from(id: td[:id], name: td[:name], arguments: td[:arguments])
|
|
84
80
|
end
|
|
@@ -90,77 +86,48 @@ module Brute
|
|
|
90
86
|
tool_calls: tool_calls,
|
|
91
87
|
original_tool_calls: original_tool_calls,
|
|
92
88
|
})
|
|
93
|
-
|
|
89
|
+
messages << synthetic
|
|
94
90
|
end
|
|
95
91
|
end
|
|
96
92
|
end
|
|
97
93
|
end
|
|
98
94
|
|
|
99
|
-
|
|
100
|
-
require_relative "../../../spec/
|
|
101
|
-
|
|
102
|
-
RSpec.describe Brute::Middleware::ToolUseGuard do
|
|
103
|
-
let(:provider) { MockProvider.new }
|
|
104
|
-
|
|
105
|
-
# Helper: build a response that produces pending tool calls (functions) in the context.
|
|
106
|
-
def make_tool_response(tool_calls:)
|
|
107
|
-
MockResponse.new(content: "", tool_calls: tool_calls)
|
|
108
|
-
end
|
|
109
|
-
|
|
110
|
-
it "passes the response through when there are no pending functions" do
|
|
111
|
-
response = MockResponse.new(content: "no tools")
|
|
112
|
-
allow(provider).to receive(:complete).and_return(response)
|
|
113
|
-
|
|
114
|
-
ctx = LLM::Context.new(provider, tools: [])
|
|
115
|
-
prompt = ctx.prompt { |p| p.system("sys"); p.user("hi") }
|
|
116
|
-
|
|
117
|
-
inner_app = ->(_env) { ctx.talk(prompt); response }
|
|
118
|
-
middleware = described_class.new(inner_app)
|
|
119
|
-
env = build_env(context: ctx, provider: provider)
|
|
120
|
-
|
|
121
|
-
result = middleware.call(env)
|
|
122
|
-
expect(result).to eq(response)
|
|
123
|
-
end
|
|
124
|
-
|
|
125
|
-
it "does not inject a synthetic message when the assistant message already has tool_call?" do
|
|
126
|
-
tool_calls = [{ id: "toolu_1", name: "fs_read", arguments: { "path" => "test.rb" } }]
|
|
127
|
-
response = make_tool_response(tool_calls: tool_calls)
|
|
128
|
-
allow(provider).to receive(:complete).and_return(response)
|
|
95
|
+
test do
|
|
96
|
+
require_relative "../../../spec/support/mock_provider"
|
|
97
|
+
require_relative "../../../spec/support/mock_response"
|
|
129
98
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
env = build_env(context: ctx, provider: provider)
|
|
136
|
-
|
|
137
|
-
middleware.call(env)
|
|
138
|
-
|
|
139
|
-
messages = ctx.messages.to_a
|
|
140
|
-
assistant_msgs = messages.select { |m| m.role.to_s == "assistant" }
|
|
141
|
-
# Should only have the original assistant message, no synthetic
|
|
142
|
-
expect(assistant_msgs.size).to eq(1)
|
|
143
|
-
end
|
|
144
|
-
|
|
145
|
-
it "injects a synthetic assistant message when tool calls exist but assistant is missing" do
|
|
146
|
-
tool_calls = [{ id: "toolu_1", name: "fs_read", arguments: { "path" => "test.rb" } }]
|
|
147
|
-
response = MockResponse.new(content: "")
|
|
148
|
-
# Simulate the bug: choices[-1] is nil, so no assistant message stored
|
|
149
|
-
allow(response).to receive(:choices).and_return([nil])
|
|
150
|
-
allow(provider).to receive(:complete).and_return(response)
|
|
151
|
-
|
|
152
|
-
ctx = LLM::Context.new(provider, tools: [])
|
|
153
|
-
prompt = ctx.prompt { |p| p.system("sys"); p.user("read it") }
|
|
99
|
+
def build_env(**overrides)
|
|
100
|
+
{ provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
|
|
101
|
+
messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
|
|
102
|
+
tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
|
|
103
|
+
end
|
|
154
104
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
105
|
+
it "passes the response through when there are no pending functions" do
|
|
106
|
+
response = MockResponse.new(content: "no tools")
|
|
107
|
+
inner_app = ->(_env) { response }
|
|
108
|
+
middleware = Brute::Middleware::ToolUseGuard.new(inner_app)
|
|
109
|
+
result = middleware.call(build_env(pending_functions: []))
|
|
110
|
+
result.should == response
|
|
111
|
+
end
|
|
159
112
|
|
|
160
|
-
|
|
161
|
-
|
|
113
|
+
it "injects a synthetic assistant message when tool calls exist but assistant is missing" do
|
|
114
|
+
fn = Struct.new(:id, :name, :arguments, keyword_init: true)
|
|
115
|
+
.new(id: "toolu_1", name: "fs_read", arguments: { "path" => "test.rb" })
|
|
116
|
+
response = MockResponse.new(content: "")
|
|
117
|
+
inner_app = ->(_env) { response }
|
|
118
|
+
middleware = Brute::Middleware::ToolUseGuard.new(inner_app)
|
|
119
|
+
env = build_env(messages: [], pending_functions: [fn])
|
|
120
|
+
lambda { middleware.call(env) }.should.not.raise
|
|
121
|
+
end
|
|
162
122
|
|
|
163
|
-
|
|
164
|
-
|
|
123
|
+
it "creates one assistant message for uncovered tool calls" do
|
|
124
|
+
fn = Struct.new(:id, :name, :arguments, keyword_init: true)
|
|
125
|
+
.new(id: "toolu_1", name: "fs_read", arguments: { "path" => "test.rb" })
|
|
126
|
+
response = MockResponse.new(content: "")
|
|
127
|
+
inner_app = ->(_env) { response }
|
|
128
|
+
middleware = Brute::Middleware::ToolUseGuard.new(inner_app)
|
|
129
|
+
env = build_env(messages: [], pending_functions: [fn])
|
|
130
|
+
middleware.call(env)
|
|
131
|
+
env[:messages].select { |m| m.role.to_s == "assistant" }.size.should == 1
|
|
165
132
|
end
|
|
166
133
|
end
|
|
@@ -1,9 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
require "brute"
|
|
6
|
-
end
|
|
3
|
+
require "bundler/setup"
|
|
4
|
+
require "brute"
|
|
7
5
|
|
|
8
6
|
module Brute
|
|
9
7
|
module Middleware
|
|
@@ -14,7 +12,7 @@ module Brute
|
|
|
14
12
|
# call. It also tracks total wall-clock time across all calls in a turn
|
|
15
13
|
# (including tool execution gaps between LLM calls).
|
|
16
14
|
#
|
|
17
|
-
# A new turn is detected when env[:tool_results] is nil (the
|
|
15
|
+
# A new turn is detected when env[:tool_results] is nil (the agent loop
|
|
18
16
|
# sets this on the first call of each run()).
|
|
19
17
|
#
|
|
20
18
|
# Stores in env[:metadata][:timing]:
|
|
@@ -41,7 +39,7 @@ module Brute
|
|
|
41
39
|
@total_llm_elapsed = 0.0
|
|
42
40
|
end
|
|
43
41
|
|
|
44
|
-
messages = env[:
|
|
42
|
+
messages = env[:messages]
|
|
45
43
|
@logger.debug("[brute] LLM call ##{@call_count} (#{messages.size} messages in context)")
|
|
46
44
|
|
|
47
45
|
start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
@@ -67,68 +65,60 @@ module Brute
|
|
|
67
65
|
end
|
|
68
66
|
end
|
|
69
67
|
|
|
70
|
-
|
|
71
|
-
require_relative "../../../spec/
|
|
72
|
-
|
|
73
|
-
RSpec.describe Brute::Middleware::Tracing do
|
|
74
|
-
let(:response) { MockResponse.new(content: "traced response") }
|
|
75
|
-
let(:inner_app) { ->(_env) { response } }
|
|
76
|
-
let(:log_output) { StringIO.new }
|
|
77
|
-
let(:logger) { Logger.new(log_output) }
|
|
78
|
-
let(:middleware) { described_class.new(inner_app, logger: logger) }
|
|
79
|
-
|
|
80
|
-
it "passes the response through unchanged" do
|
|
81
|
-
env = build_env(tool_results: nil)
|
|
82
|
-
result = middleware.call(env)
|
|
83
|
-
expect(result).to eq(response)
|
|
84
|
-
end
|
|
85
|
-
|
|
86
|
-
it "populates env[:metadata][:timing] with all required keys" do
|
|
87
|
-
env = build_env(tool_results: nil)
|
|
88
|
-
middleware.call(env)
|
|
89
|
-
|
|
90
|
-
timing = env[:metadata][:timing]
|
|
91
|
-
expect(timing).to include(
|
|
92
|
-
:total_elapsed,
|
|
93
|
-
:total_llm_elapsed,
|
|
94
|
-
:llm_call_count,
|
|
95
|
-
:last_call_elapsed
|
|
96
|
-
)
|
|
97
|
-
expect(timing[:llm_call_count]).to eq(1)
|
|
98
|
-
expect(timing[:last_call_elapsed]).to be >= 0
|
|
99
|
-
expect(timing[:total_llm_elapsed]).to be >= 0
|
|
100
|
-
end
|
|
101
|
-
|
|
102
|
-
it "resets turn timing when tool_results is nil (new turn)" do
|
|
103
|
-
env = build_env(tool_results: nil)
|
|
104
|
-
middleware.call(env)
|
|
105
|
-
first_elapsed = env[:metadata][:timing][:total_llm_elapsed]
|
|
68
|
+
test do
|
|
69
|
+
require_relative "../../../spec/support/mock_provider"
|
|
70
|
+
require_relative "../../../spec/support/mock_response"
|
|
106
71
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
72
|
+
def build_env(**overrides)
|
|
73
|
+
{ provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
|
|
74
|
+
messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
|
|
75
|
+
tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
|
|
76
|
+
end
|
|
110
77
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
78
|
+
it "passes the response through unchanged" do
|
|
79
|
+
response = MockResponse.new(content: "traced response")
|
|
80
|
+
inner_app = ->(_env) { response }
|
|
81
|
+
middleware = Brute::Middleware::Tracing.new(inner_app, logger: Logger.new(StringIO.new))
|
|
82
|
+
result = middleware.call(build_env(tool_results: nil))
|
|
83
|
+
result.should == response
|
|
84
|
+
end
|
|
114
85
|
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
86
|
+
it "populates timing with llm_call_count" do
|
|
87
|
+
response = MockResponse.new(content: "traced response")
|
|
88
|
+
inner_app = ->(_env) { response }
|
|
89
|
+
middleware = Brute::Middleware::Tracing.new(inner_app, logger: Logger.new(StringIO.new))
|
|
90
|
+
env = build_env(tool_results: nil)
|
|
91
|
+
middleware.call(env)
|
|
92
|
+
env[:metadata][:timing][:llm_call_count].should == 1
|
|
93
|
+
end
|
|
121
94
|
|
|
122
|
-
|
|
123
|
-
|
|
95
|
+
it "populates timing with non-negative last_call_elapsed" do
|
|
96
|
+
response = MockResponse.new(content: "traced response")
|
|
97
|
+
inner_app = ->(_env) { response }
|
|
98
|
+
middleware = Brute::Middleware::Tracing.new(inner_app, logger: Logger.new(StringIO.new))
|
|
99
|
+
env = build_env(tool_results: nil)
|
|
100
|
+
middleware.call(env)
|
|
101
|
+
(env[:metadata][:timing][:last_call_elapsed] >= 0).should.be.true
|
|
102
|
+
end
|
|
124
103
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
104
|
+
it "accumulates call count across multiple calls" do
|
|
105
|
+
response = MockResponse.new(content: "traced response")
|
|
106
|
+
inner_app = ->(_env) { response }
|
|
107
|
+
middleware = Brute::Middleware::Tracing.new(inner_app, logger: Logger.new(StringIO.new))
|
|
108
|
+
env = build_env(tool_results: nil)
|
|
109
|
+
middleware.call(env)
|
|
110
|
+
env[:tool_results] = [["read", {}]]
|
|
111
|
+
middleware.call(env)
|
|
112
|
+
middleware.call(env)
|
|
113
|
+
env[:metadata][:timing][:llm_call_count].should == 3
|
|
114
|
+
end
|
|
128
115
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
116
|
+
it "logs LLM call and response messages" do
|
|
117
|
+
response = MockResponse.new(content: "traced response")
|
|
118
|
+
inner_app = ->(_env) { response }
|
|
119
|
+
log_output = StringIO.new
|
|
120
|
+
middleware = Brute::Middleware::Tracing.new(inner_app, logger: Logger.new(log_output))
|
|
121
|
+
middleware.call(build_env(tool_results: nil))
|
|
122
|
+
log_output.string.should =~ /LLM call #1/
|
|
133
123
|
end
|
|
134
124
|
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
require_relative 'middleware/base'
|
|
2
|
+
require_relative 'middleware/llm_call'
|
|
3
|
+
require_relative 'middleware/retry'
|
|
4
|
+
require_relative 'middleware/doom_loop_detection'
|
|
5
|
+
require_relative 'middleware/token_tracking'
|
|
6
|
+
require_relative 'middleware/compaction_check'
|
|
7
|
+
require_relative 'middleware/session_persistence'
|
|
8
|
+
require_relative 'middleware/message_tracking'
|
|
9
|
+
require_relative 'middleware/tracing'
|
|
10
|
+
require_relative 'middleware/tool_error_tracking'
|
|
11
|
+
require_relative 'middleware/reasoning_normalizer'
|
|
12
|
+
require_relative "middleware/tool_use_guard"
|
|
13
|
+
require_relative "middleware/otel"
|
|
14
|
+
|
|
15
|
+
module Brute
|
|
16
|
+
module Middleware
|
|
17
|
+
end
|
|
18
|
+
end
|