brute 1.0.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/brute/agent.rb +72 -6
- data/lib/brute/events/handler.rb +69 -0
- data/lib/brute/events/prefixed_terminal_output.rb +72 -0
- data/lib/brute/events/terminal_output_handler.rb +68 -0
- data/lib/brute/middleware/001_otel_span.rb +77 -0
- data/lib/brute/middleware/003_tool_result_loop.rb +103 -0
- data/lib/brute/middleware/004_summarize.rb +139 -0
- data/lib/brute/middleware/005_tracing.rb +86 -0
- data/lib/brute/middleware/010_max_iterations.rb +73 -0
- data/lib/brute/middleware/015_otel_token_usage.rb +42 -0
- data/lib/brute/middleware/020_system_prompt.rb +128 -0
- data/lib/brute/middleware/040_compaction_check.rb +155 -0
- data/lib/brute/middleware/060_questions.rb +41 -0
- data/lib/brute/middleware/070_tool_call.rb +247 -0
- data/lib/brute/middleware/073_otel_tool_call.rb +49 -0
- data/lib/brute/middleware/075_otel_tool_results.rb +46 -0
- data/lib/brute/middleware/100_llm_call.rb +62 -0
- data/lib/brute/middleware/event_handler.rb +25 -0
- data/lib/brute/middleware/user_queue.rb +35 -0
- data/lib/brute/pipeline.rb +44 -107
- data/lib/brute/prompts/skills.rb +2 -2
- data/lib/brute/prompts.rb +23 -23
- data/lib/brute/providers/shell.rb +6 -19
- data/lib/brute/providers/shell_response.rb +22 -30
- data/lib/brute/session.rb +52 -0
- data/lib/brute/store/snapshot_store.rb +21 -37
- data/lib/brute/sub_agent.rb +106 -0
- data/lib/brute/system_prompt.rb +1 -83
- data/lib/brute/tool.rb +107 -0
- data/lib/brute/tools/delegate.rb +61 -70
- data/lib/brute/tools/fs_patch.rb +9 -7
- data/lib/brute/tools/fs_read.rb +233 -20
- data/lib/brute/tools/fs_remove.rb +8 -9
- data/lib/brute/tools/fs_search.rb +98 -16
- data/lib/brute/tools/fs_undo.rb +8 -8
- data/lib/brute/tools/fs_write.rb +7 -5
- data/lib/brute/tools/net_fetch.rb +8 -8
- data/lib/brute/tools/question.rb +36 -24
- data/lib/brute/tools/shell.rb +74 -16
- data/lib/brute/tools/todo_read.rb +8 -8
- data/lib/brute/tools/todo_write.rb +25 -18
- data/lib/brute/tools.rb +8 -12
- data/lib/brute/truncation.rb +219 -0
- data/lib/brute/version.rb +1 -1
- data/lib/brute.rb +82 -45
- metadata +59 -46
- data/lib/brute/loop/agent_stream.rb +0 -118
- data/lib/brute/loop/agent_turn.rb +0 -520
- data/lib/brute/loop/compactor.rb +0 -107
- data/lib/brute/loop/doom_loop.rb +0 -86
- data/lib/brute/loop/step.rb +0 -332
- data/lib/brute/loop/tool_call_step.rb +0 -90
- data/lib/brute/middleware/base.rb +0 -27
- data/lib/brute/middleware/compaction_check.rb +0 -106
- data/lib/brute/middleware/doom_loop_detection.rb +0 -136
- data/lib/brute/middleware/llm_call.rb +0 -128
- data/lib/brute/middleware/message_tracking.rb +0 -339
- data/lib/brute/middleware/otel/span.rb +0 -105
- data/lib/brute/middleware/otel/token_usage.rb +0 -68
- data/lib/brute/middleware/otel/tool_calls.rb +0 -68
- data/lib/brute/middleware/otel/tool_results.rb +0 -65
- data/lib/brute/middleware/otel.rb +0 -34
- data/lib/brute/middleware/reasoning_normalizer.rb +0 -192
- data/lib/brute/middleware/retry.rb +0 -157
- data/lib/brute/middleware/session_persistence.rb +0 -72
- data/lib/brute/middleware/token_tracking.rb +0 -124
- data/lib/brute/middleware/tool_error_tracking.rb +0 -179
- data/lib/brute/middleware/tool_use_guard.rb +0 -133
- data/lib/brute/middleware/tracing.rb +0 -124
- data/lib/brute/middleware.rb +0 -18
- data/lib/brute/orchestrator/turn.rb +0 -105
- data/lib/brute/patches/anthropic_tool_role.rb +0 -35
- data/lib/brute/patches/buffer_nil_guard.rb +0 -26
- data/lib/brute/providers/models_dev.rb +0 -111
- data/lib/brute/providers/ollama.rb +0 -135
- data/lib/brute/providers/opencode_go.rb +0 -43
- data/lib/brute/providers/opencode_zen.rb +0 -87
- data/lib/brute/providers.rb +0 -62
- data/lib/brute/queue/base_queue.rb +0 -222
- data/lib/brute/queue/parallel_queue.rb +0 -66
- data/lib/brute/queue/sequential_queue.rb +0 -63
- data/lib/brute/store/message_store.rb +0 -362
- data/lib/brute/store/session.rb +0 -106
- /data/lib/brute/{diff.rb → utils/diff.rb} +0 -0
data/lib/brute/loop/step.rb
DELETED
|
@@ -1,332 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require "bundler/setup"
|
|
4
|
-
require "brute"
|
|
5
|
-
|
|
6
|
-
require "securerandom"
|
|
7
|
-
require "async"
|
|
8
|
-
|
|
9
|
-
module Brute
|
|
10
|
-
module Loop
|
|
11
|
-
# A first-class work object with identity, state, result/error capture,
|
|
12
|
-
# optional sub-queue, and cancellation.
|
|
13
|
-
#
|
|
14
|
-
# Users subclass Step and override #perform(task). The framework calls
|
|
15
|
-
# #call(task) which owns the state machine — subclasses never touch
|
|
16
|
-
# state transitions directly.
|
|
17
|
-
#
|
|
18
|
-
# State machine:
|
|
19
|
-
#
|
|
20
|
-
# ┌──> completed
|
|
21
|
-
# │
|
|
22
|
-
# pending ──> running ──┤
|
|
23
|
-
# │ │
|
|
24
|
-
# │ ├──> failed
|
|
25
|
-
# │ │
|
|
26
|
-
# └──> cancelled └──> cancelled
|
|
27
|
-
#
|
|
28
|
-
# Three terminal states. Two non-terminal. Once terminal, stays terminal.
|
|
29
|
-
#
|
|
30
|
-
class Step
|
|
31
|
-
STATES = %i[pending running completed failed cancelled].freeze
|
|
32
|
-
|
|
33
|
-
attr_reader :id
|
|
34
|
-
|
|
35
|
-
def initialize(id: nil, **attributes)
|
|
36
|
-
@id = id || self.class.generate_id
|
|
37
|
-
@attributes = attributes
|
|
38
|
-
@state = :pending
|
|
39
|
-
@result = nil
|
|
40
|
-
@error = nil
|
|
41
|
-
@task = nil
|
|
42
|
-
@jobs = nil
|
|
43
|
-
@mutex = Mutex.new
|
|
44
|
-
end
|
|
45
|
-
|
|
46
|
-
def self.generate_id
|
|
47
|
-
"#{name}-#{Process.pid}-#{Thread.current.object_id}-#{SecureRandom.hex(4)}"
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
# Called by the queue's worker. Subclasses override #perform instead.
|
|
51
|
-
def call(task)
|
|
52
|
-
return unless transition_to_running(task)
|
|
53
|
-
|
|
54
|
-
begin
|
|
55
|
-
result = perform(task)
|
|
56
|
-
@mutex.synchronize do
|
|
57
|
-
@result = result
|
|
58
|
-
@state = :completed
|
|
59
|
-
@task = nil
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
rescue Async::Cancel
|
|
63
|
-
# Cascade to sub-queue before we lose the reference:
|
|
64
|
-
@jobs&.cancel
|
|
65
|
-
@mutex.synchronize do
|
|
66
|
-
@state = :cancelled
|
|
67
|
-
@task = nil
|
|
68
|
-
end
|
|
69
|
-
raise
|
|
70
|
-
|
|
71
|
-
rescue => error
|
|
72
|
-
# Continue-on-failure: record the error, do NOT re-raise.
|
|
73
|
-
@mutex.synchronize do
|
|
74
|
-
@error = error
|
|
75
|
-
@state = :failed
|
|
76
|
-
@task = nil
|
|
77
|
-
end
|
|
78
|
-
end
|
|
79
|
-
end
|
|
80
|
-
|
|
81
|
-
# Subclasses override this.
|
|
82
|
-
def perform(task)
|
|
83
|
-
raise "#{self.class}#perform not implemented"
|
|
84
|
-
end
|
|
85
|
-
|
|
86
|
-
# Lazy accessor — creates the sub-queue parented to our running task.
|
|
87
|
-
# Only valid while the step is running (inside #perform).
|
|
88
|
-
def jobs(type: Brute::Queue::SequentialQueue)
|
|
89
|
-
@mutex.synchronize do
|
|
90
|
-
raise "Step not running; sub-queue has nothing to parent to" unless @task
|
|
91
|
-
@jobs ||= type.new(parent: @task).start
|
|
92
|
-
end
|
|
93
|
-
end
|
|
94
|
-
|
|
95
|
-
def state
|
|
96
|
-
@mutex.synchronize { @state }
|
|
97
|
-
end
|
|
98
|
-
|
|
99
|
-
def result
|
|
100
|
-
@mutex.synchronize { @result }
|
|
101
|
-
end
|
|
102
|
-
|
|
103
|
-
def error
|
|
104
|
-
@mutex.synchronize { @error }
|
|
105
|
-
end
|
|
106
|
-
|
|
107
|
-
def status
|
|
108
|
-
@mutex.synchronize do
|
|
109
|
-
{ id: @id, state: @state, result: @result, error: @error }
|
|
110
|
-
end
|
|
111
|
-
end
|
|
112
|
-
|
|
113
|
-
def cancel
|
|
114
|
-
task = @mutex.synchronize do
|
|
115
|
-
case @state
|
|
116
|
-
when :pending
|
|
117
|
-
@state = :cancelled
|
|
118
|
-
nil
|
|
119
|
-
when :running
|
|
120
|
-
@task
|
|
121
|
-
else
|
|
122
|
-
return false # already finished
|
|
123
|
-
end
|
|
124
|
-
end
|
|
125
|
-
|
|
126
|
-
task&.cancel
|
|
127
|
-
true
|
|
128
|
-
end
|
|
129
|
-
|
|
130
|
-
private
|
|
131
|
-
|
|
132
|
-
def transition_to_running(task)
|
|
133
|
-
@mutex.synchronize do
|
|
134
|
-
return false if @state == :cancelled
|
|
135
|
-
@state = :running
|
|
136
|
-
@task = task
|
|
137
|
-
true
|
|
138
|
-
end
|
|
139
|
-
end
|
|
140
|
-
end
|
|
141
|
-
end
|
|
142
|
-
end
|
|
143
|
-
|
|
144
|
-
test do
|
|
145
|
-
class HelloStep < Brute::Loop::Step
|
|
146
|
-
def perform(task)
|
|
147
|
-
"hello"
|
|
148
|
-
end
|
|
149
|
-
end
|
|
150
|
-
|
|
151
|
-
class FailStep < Brute::Loop::Step
|
|
152
|
-
def perform(task)
|
|
153
|
-
raise "boom"
|
|
154
|
-
end
|
|
155
|
-
end
|
|
156
|
-
|
|
157
|
-
class SlowStep < Brute::Loop::Step
|
|
158
|
-
def perform(task)
|
|
159
|
-
sleep 10
|
|
160
|
-
"done"
|
|
161
|
-
end
|
|
162
|
-
end
|
|
163
|
-
|
|
164
|
-
# -- identity --
|
|
165
|
-
|
|
166
|
-
it "generates a unique id" do
|
|
167
|
-
HelloStep.new.id.should.be.kind_of String
|
|
168
|
-
end
|
|
169
|
-
|
|
170
|
-
it "accepts a custom id" do
|
|
171
|
-
HelloStep.new(id: "custom-1").id.should == "custom-1"
|
|
172
|
-
end
|
|
173
|
-
|
|
174
|
-
# -- initial state --
|
|
175
|
-
|
|
176
|
-
it "starts in pending state" do
|
|
177
|
-
HelloStep.new.state.should == :pending
|
|
178
|
-
end
|
|
179
|
-
|
|
180
|
-
it "starts with nil result" do
|
|
181
|
-
HelloStep.new.result.should.be.nil
|
|
182
|
-
end
|
|
183
|
-
|
|
184
|
-
it "starts with nil error" do
|
|
185
|
-
HelloStep.new.error.should.be.nil
|
|
186
|
-
end
|
|
187
|
-
|
|
188
|
-
# -- successful execution --
|
|
189
|
-
|
|
190
|
-
it "transitions to completed on success" do
|
|
191
|
-
Sync do
|
|
192
|
-
step = HelloStep.new
|
|
193
|
-
step.call(Async::Task.current)
|
|
194
|
-
step.state.should == :completed
|
|
195
|
-
end
|
|
196
|
-
end
|
|
197
|
-
|
|
198
|
-
it "captures the return value as result" do
|
|
199
|
-
Sync do
|
|
200
|
-
step = HelloStep.new
|
|
201
|
-
step.call(Async::Task.current)
|
|
202
|
-
step.result.should == "hello"
|
|
203
|
-
end
|
|
204
|
-
end
|
|
205
|
-
|
|
206
|
-
# -- failed execution --
|
|
207
|
-
|
|
208
|
-
it "transitions to failed on error" do
|
|
209
|
-
Sync do
|
|
210
|
-
step = FailStep.new
|
|
211
|
-
step.call(Async::Task.current)
|
|
212
|
-
step.state.should == :failed
|
|
213
|
-
end
|
|
214
|
-
end
|
|
215
|
-
|
|
216
|
-
it "captures the exception as error" do
|
|
217
|
-
Sync do
|
|
218
|
-
step = FailStep.new
|
|
219
|
-
step.call(Async::Task.current)
|
|
220
|
-
step.error.message.should == "boom"
|
|
221
|
-
end
|
|
222
|
-
end
|
|
223
|
-
|
|
224
|
-
it "does not re-raise on failure" do
|
|
225
|
-
Sync do
|
|
226
|
-
step = FailStep.new
|
|
227
|
-
lambda { step.call(Async::Task.current) }.should.not.raise
|
|
228
|
-
end
|
|
229
|
-
end
|
|
230
|
-
|
|
231
|
-
# -- cancellation of pending step --
|
|
232
|
-
|
|
233
|
-
it "cancel returns true for pending step" do
|
|
234
|
-
HelloStep.new.cancel.should.be.true
|
|
235
|
-
end
|
|
236
|
-
|
|
237
|
-
it "transitions pending step to cancelled" do
|
|
238
|
-
step = HelloStep.new
|
|
239
|
-
step.cancel
|
|
240
|
-
step.state.should == :cancelled
|
|
241
|
-
end
|
|
242
|
-
|
|
243
|
-
it "skips perform when cancelled before call" do
|
|
244
|
-
Sync do
|
|
245
|
-
step = HelloStep.new
|
|
246
|
-
step.cancel
|
|
247
|
-
step.call(Async::Task.current)
|
|
248
|
-
step.result.should.be.nil
|
|
249
|
-
end
|
|
250
|
-
end
|
|
251
|
-
|
|
252
|
-
# -- cancellation of finished step --
|
|
253
|
-
|
|
254
|
-
it "cancel returns false for completed step" do
|
|
255
|
-
Sync do
|
|
256
|
-
step = HelloStep.new
|
|
257
|
-
step.call(Async::Task.current)
|
|
258
|
-
step.cancel.should.be.false
|
|
259
|
-
end
|
|
260
|
-
end
|
|
261
|
-
|
|
262
|
-
it "cancel returns false for failed step" do
|
|
263
|
-
Sync do
|
|
264
|
-
step = FailStep.new
|
|
265
|
-
step.call(Async::Task.current)
|
|
266
|
-
step.cancel.should.be.false
|
|
267
|
-
end
|
|
268
|
-
end
|
|
269
|
-
|
|
270
|
-
# -- status --
|
|
271
|
-
|
|
272
|
-
it "status includes id" do
|
|
273
|
-
step = HelloStep.new(id: "s1")
|
|
274
|
-
step.status[:id].should == "s1"
|
|
275
|
-
end
|
|
276
|
-
|
|
277
|
-
it "status includes state" do
|
|
278
|
-
step = HelloStep.new
|
|
279
|
-
step.status[:state].should == :pending
|
|
280
|
-
end
|
|
281
|
-
|
|
282
|
-
# -- perform not implemented --
|
|
283
|
-
|
|
284
|
-
it "raises NotImplementedError for base Step" do
|
|
285
|
-
Sync do
|
|
286
|
-
step = Brute::Loop::Step.new
|
|
287
|
-
step.call(Async::Task.current)
|
|
288
|
-
step.state.should == :failed
|
|
289
|
-
end
|
|
290
|
-
end
|
|
291
|
-
|
|
292
|
-
# -- jobs raises when not running --
|
|
293
|
-
|
|
294
|
-
it "raises when accessing jobs outside perform" do
|
|
295
|
-
lambda { HelloStep.new.jobs(type: Array) }.should.raise(RuntimeError)
|
|
296
|
-
end
|
|
297
|
-
|
|
298
|
-
# -- attributes stored --
|
|
299
|
-
|
|
300
|
-
it "stores attributes" do
|
|
301
|
-
step = HelloStep.new(url: "https://example.com")
|
|
302
|
-
step.instance_variable_get(:@attributes)[:url].should == "https://example.com"
|
|
303
|
-
end
|
|
304
|
-
|
|
305
|
-
# -- nested sub-queue --
|
|
306
|
-
|
|
307
|
-
describe "nesting" do
|
|
308
|
-
class ParentStep < Brute::Loop::Step
|
|
309
|
-
def perform(task)
|
|
310
|
-
3.times { |i| jobs(type: Brute::Queue::SequentialQueue) << HelloStep.new(id: "child-#{i}") }
|
|
311
|
-
jobs.drain
|
|
312
|
-
jobs.steps.map(&:result)
|
|
313
|
-
end
|
|
314
|
-
end
|
|
315
|
-
|
|
316
|
-
it "creates a sub-queue inside perform" do
|
|
317
|
-
Sync do
|
|
318
|
-
step = ParentStep.new
|
|
319
|
-
step.call(Async::Task.current)
|
|
320
|
-
step.result.should == ["hello", "hello", "hello"]
|
|
321
|
-
end
|
|
322
|
-
end
|
|
323
|
-
|
|
324
|
-
it "sub-steps all complete" do
|
|
325
|
-
Sync do
|
|
326
|
-
step = ParentStep.new
|
|
327
|
-
step.call(Async::Task.current)
|
|
328
|
-
step.instance_variable_get(:@jobs).steps.all? { |s| s.state == :completed }.should.be.true
|
|
329
|
-
end
|
|
330
|
-
end
|
|
331
|
-
end
|
|
332
|
-
end
|
|
@@ -1,90 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require "bundler/setup"
|
|
4
|
-
require "brute"
|
|
5
|
-
|
|
6
|
-
module Brute
|
|
7
|
-
module Loop
|
|
8
|
-
# A Step that wraps an LLM::Function tool call.
|
|
9
|
-
#
|
|
10
|
-
# Identity comes from the function's call ID so tool results
|
|
11
|
-
# can be correlated back to the LLM's request.
|
|
12
|
-
#
|
|
13
|
-
class ToolCallStep < Step
|
|
14
|
-
attr_reader :function
|
|
15
|
-
|
|
16
|
-
def initialize(function:, **rest)
|
|
17
|
-
super(id: function.id, **rest)
|
|
18
|
-
@function = function
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
def perform(task)
|
|
22
|
-
@function.call
|
|
23
|
-
end
|
|
24
|
-
end
|
|
25
|
-
end
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
test do
|
|
29
|
-
FakeFunction = Struct.new(:id, :name, :arguments, :return_value) do
|
|
30
|
-
def call
|
|
31
|
-
return_value
|
|
32
|
-
end
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
class FailFunction
|
|
36
|
-
attr_reader :id, :name, :arguments
|
|
37
|
-
def initialize
|
|
38
|
-
@id = "fail_1"
|
|
39
|
-
@name = "fail"
|
|
40
|
-
@arguments = {}
|
|
41
|
-
end
|
|
42
|
-
def call
|
|
43
|
-
raise "tool exploded"
|
|
44
|
-
end
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
it "uses function id as step id" do
|
|
48
|
-
fn = FakeFunction.new("call_123", "read", {}, "content")
|
|
49
|
-
Brute::Loop::ToolCallStep.new(function: fn).id.should == "call_123"
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
it "calls the function in perform" do
|
|
53
|
-
Sync do
|
|
54
|
-
fn = FakeFunction.new("call_1", "read", {}, "file contents")
|
|
55
|
-
step = Brute::Loop::ToolCallStep.new(function: fn)
|
|
56
|
-
step.call(Async::Task.current)
|
|
57
|
-
step.result.should == "file contents"
|
|
58
|
-
end
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
it "transitions to completed on success" do
|
|
62
|
-
Sync do
|
|
63
|
-
fn = FakeFunction.new("call_2", "write", {}, "ok")
|
|
64
|
-
step = Brute::Loop::ToolCallStep.new(function: fn)
|
|
65
|
-
step.call(Async::Task.current)
|
|
66
|
-
step.state.should == :completed
|
|
67
|
-
end
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
it "captures function as accessor" do
|
|
71
|
-
fn = FakeFunction.new("call_3", "shell", {}, nil)
|
|
72
|
-
Brute::Loop::ToolCallStep.new(function: fn).function.should.be.identical_to fn
|
|
73
|
-
end
|
|
74
|
-
|
|
75
|
-
it "transitions to failed when function raises" do
|
|
76
|
-
Sync do
|
|
77
|
-
step = Brute::Loop::ToolCallStep.new(function: FailFunction.new)
|
|
78
|
-
step.call(Async::Task.current)
|
|
79
|
-
step.state.should == :failed
|
|
80
|
-
end
|
|
81
|
-
end
|
|
82
|
-
|
|
83
|
-
it "captures function error" do
|
|
84
|
-
Sync do
|
|
85
|
-
step = Brute::Loop::ToolCallStep.new(function: FailFunction.new)
|
|
86
|
-
step.call(Async::Task.current)
|
|
87
|
-
step.error.message.should == "tool exploded"
|
|
88
|
-
end
|
|
89
|
-
end
|
|
90
|
-
end
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Brute
|
|
4
|
-
module Middleware
|
|
5
|
-
# Base class for all middleware. Provides the standard Rack-style pattern:
|
|
6
|
-
#
|
|
7
|
-
# def call(env)
|
|
8
|
-
# # pre-processing
|
|
9
|
-
# response = @app.call(env)
|
|
10
|
-
# # post-processing
|
|
11
|
-
# response
|
|
12
|
-
# end
|
|
13
|
-
#
|
|
14
|
-
# Subclasses MUST call @app.call(env) unless they are intentionally
|
|
15
|
-
# short-circuiting (e.g., returning a cached response).
|
|
16
|
-
#
|
|
17
|
-
class Base
|
|
18
|
-
def initialize(app)
|
|
19
|
-
@app = app
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
def call(env)
|
|
23
|
-
@app.call(env)
|
|
24
|
-
end
|
|
25
|
-
end
|
|
26
|
-
end
|
|
27
|
-
end
|
|
@@ -1,106 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require "bundler/setup"
|
|
4
|
-
require "brute"
|
|
5
|
-
|
|
6
|
-
module Brute
|
|
7
|
-
module Middleware
|
|
8
|
-
# Checks context size after each LLM call and triggers compaction
|
|
9
|
-
# when thresholds are exceeded.
|
|
10
|
-
#
|
|
11
|
-
# Runs POST-call: inspects message count and token usage. If compaction
|
|
12
|
-
# is needed, summarizes older messages and replaces env[:messages] with
|
|
13
|
-
# the summary so the next LLM call starts with a compact history.
|
|
14
|
-
#
|
|
15
|
-
class CompactionCheck < Base
|
|
16
|
-
def initialize(app, compactor:, system_prompt:)
|
|
17
|
-
super(app)
|
|
18
|
-
@compactor = compactor
|
|
19
|
-
@system_prompt = system_prompt
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
def call(env)
|
|
23
|
-
response = @app.call(env)
|
|
24
|
-
|
|
25
|
-
messages = env[:messages]
|
|
26
|
-
usage = env[:metadata].dig(:tokens, :last_call)
|
|
27
|
-
|
|
28
|
-
if @compactor.should_compact?(messages, usage: usage)
|
|
29
|
-
result = @compactor.compact(messages)
|
|
30
|
-
if result
|
|
31
|
-
summary_text, _recent = result
|
|
32
|
-
env[:metadata][:compaction] = {
|
|
33
|
-
messages_before: messages.size,
|
|
34
|
-
timestamp: Time.now.iso8601,
|
|
35
|
-
}
|
|
36
|
-
# Replace the message history with the summary
|
|
37
|
-
env[:messages] = [
|
|
38
|
-
LLM::Message.new(:system, @system_prompt),
|
|
39
|
-
LLM::Message.new(:user, "[Previous conversation summary]\n\n#{summary_text}"),
|
|
40
|
-
]
|
|
41
|
-
end
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
response
|
|
45
|
-
end
|
|
46
|
-
end
|
|
47
|
-
end
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
test do
|
|
51
|
-
require_relative "../../../spec/support/mock_provider"
|
|
52
|
-
require_relative "../../../spec/support/mock_response"
|
|
53
|
-
|
|
54
|
-
def build_env(**overrides)
|
|
55
|
-
{ provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
|
|
56
|
-
messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
|
|
57
|
-
tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
|
|
58
|
-
end
|
|
59
|
-
|
|
60
|
-
def make_compactor(should: false, result: nil)
|
|
61
|
-
Object.new.tap do |c|
|
|
62
|
-
c.define_singleton_method(:should_compact?) { |_msgs, **_| should }
|
|
63
|
-
c.define_singleton_method(:compact) { |_msgs| result }
|
|
64
|
-
end
|
|
65
|
-
end
|
|
66
|
-
|
|
67
|
-
it "passes the response through when compaction is not needed" do
|
|
68
|
-
response = MockResponse.new(content: "compaction response")
|
|
69
|
-
compactor = make_compactor(should: false)
|
|
70
|
-
middleware = Brute::Middleware::CompactionCheck.new(->(_env) { response }, compactor: compactor, system_prompt: "sys")
|
|
71
|
-
result = middleware.call(build_env)
|
|
72
|
-
result.should == response
|
|
73
|
-
end
|
|
74
|
-
|
|
75
|
-
it "does not set compaction metadata when not needed" do
|
|
76
|
-
compactor = make_compactor(should: false)
|
|
77
|
-
middleware = Brute::Middleware::CompactionCheck.new(->(_env) { MockResponse.new }, compactor: compactor, system_prompt: "sys")
|
|
78
|
-
env = build_env
|
|
79
|
-
middleware.call(env)
|
|
80
|
-
env[:metadata][:compaction].should.be.nil
|
|
81
|
-
end
|
|
82
|
-
|
|
83
|
-
it "replaces messages with summary when compaction triggers" do
|
|
84
|
-
compactor = make_compactor(should: true, result: ["Summary of conversation", []])
|
|
85
|
-
middleware = Brute::Middleware::CompactionCheck.new(->(_env) { MockResponse.new }, compactor: compactor, system_prompt: "sys")
|
|
86
|
-
env = build_env(messages: [LLM::Message.new(:user, "hello"), LLM::Message.new(:assistant, "hi"), LLM::Message.new(:user, "how")])
|
|
87
|
-
middleware.call(env)
|
|
88
|
-
env[:metadata][:compaction][:messages_before].should == 3
|
|
89
|
-
end
|
|
90
|
-
|
|
91
|
-
it "creates two messages after compaction" do
|
|
92
|
-
compactor = make_compactor(should: true, result: ["Summary", []])
|
|
93
|
-
middleware = Brute::Middleware::CompactionCheck.new(->(_env) { MockResponse.new }, compactor: compactor, system_prompt: "sys")
|
|
94
|
-
env = build_env(messages: [LLM::Message.new(:user, "hello")])
|
|
95
|
-
middleware.call(env)
|
|
96
|
-
env[:messages].size.should == 2
|
|
97
|
-
end
|
|
98
|
-
|
|
99
|
-
it "handles compactor returning nil gracefully" do
|
|
100
|
-
compactor = make_compactor(should: true, result: nil)
|
|
101
|
-
middleware = Brute::Middleware::CompactionCheck.new(->(_env) { MockResponse.new }, compactor: compactor, system_prompt: "sys")
|
|
102
|
-
env = build_env(messages: [LLM::Message.new(:user, "hello")])
|
|
103
|
-
middleware.call(env)
|
|
104
|
-
env[:metadata][:compaction].should.be.nil
|
|
105
|
-
end
|
|
106
|
-
end
|
|
@@ -1,136 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require "bundler/setup"
|
|
4
|
-
require "brute"
|
|
5
|
-
|
|
6
|
-
module Brute
|
|
7
|
-
module Middleware
|
|
8
|
-
# Detects when the agent is stuck repeating tool call patterns and injects
|
|
9
|
-
# a corrective warning into the message history before the next LLM call.
|
|
10
|
-
#
|
|
11
|
-
# Runs PRE-call: inspects the conversation history for repeating tool call
|
|
12
|
-
# patterns. If detected, appends a warning message so the LLM sees it as
|
|
13
|
-
# input alongside the normal tool results.
|
|
14
|
-
#
|
|
15
|
-
class DoomLoopDetection < Base
|
|
16
|
-
def initialize(app, threshold: 3)
|
|
17
|
-
super(app)
|
|
18
|
-
@detector = Brute::Loop::DoomLoopDetector.new(threshold: threshold)
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
def call(env)
|
|
22
|
-
messages = env[:messages]
|
|
23
|
-
|
|
24
|
-
if (reps = @detector.detect(messages))
|
|
25
|
-
warning = @detector.warning_message(reps)
|
|
26
|
-
# Inject the warning as a user message so the LLM sees it
|
|
27
|
-
env[:messages] << LLM::Message.new(:user, warning)
|
|
28
|
-
env[:metadata][:doom_loop_detected] = reps
|
|
29
|
-
|
|
30
|
-
# Signal the agent loop to exit after this LLM call completes.
|
|
31
|
-
# First-writer-wins: don't overwrite if another middleware already set it.
|
|
32
|
-
env[:should_exit] ||= {
|
|
33
|
-
reason: "doom_loop_detected",
|
|
34
|
-
message: "Agent is stuck repeating the same tool calls (#{reps} repetitions).",
|
|
35
|
-
source: "DoomLoopDetection",
|
|
36
|
-
}
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
@app.call(env)
|
|
40
|
-
end
|
|
41
|
-
end
|
|
42
|
-
end
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
test do
|
|
46
|
-
require_relative "../../../spec/support/mock_provider"
|
|
47
|
-
require_relative "../../../spec/support/mock_response"
|
|
48
|
-
|
|
49
|
-
def build_env(**overrides)
|
|
50
|
-
{ provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
|
|
51
|
-
messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
|
|
52
|
-
tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
FakeFunc = Struct.new(:name, :arguments, keyword_init: true)
|
|
56
|
-
|
|
57
|
-
def assistant_msg_with_functions(function_list)
|
|
58
|
-
msg = LLM::Message.new(:assistant, "tool msg", {})
|
|
59
|
-
msg.define_singleton_method(:functions) { function_list }
|
|
60
|
-
msg
|
|
61
|
-
end
|
|
62
|
-
|
|
63
|
-
it "passes through when no doom loop is detected" do
|
|
64
|
-
inner_app = ->(_env) { MockResponse.new(content: "loop check") }
|
|
65
|
-
middleware = Brute::Middleware::DoomLoopDetection.new(inner_app, threshold: 3)
|
|
66
|
-
env = build_env
|
|
67
|
-
middleware.call(env)
|
|
68
|
-
env[:metadata][:doom_loop_detected].should.be.nil
|
|
69
|
-
end
|
|
70
|
-
|
|
71
|
-
it "detects consecutive identical tool calls" do
|
|
72
|
-
inner_app = ->(_env) { MockResponse.new(content: "loop check") }
|
|
73
|
-
fn = FakeFunc.new(name: "fs_read", arguments: '{"path":"x.rb"}')
|
|
74
|
-
messages = 4.times.map { assistant_msg_with_functions([fn]) }
|
|
75
|
-
middleware = Brute::Middleware::DoomLoopDetection.new(inner_app, threshold: 3)
|
|
76
|
-
env = build_env(messages: messages)
|
|
77
|
-
middleware.call(env)
|
|
78
|
-
env[:metadata][:doom_loop_detected].should.not.be.nil
|
|
79
|
-
end
|
|
80
|
-
|
|
81
|
-
it "does not trigger below the threshold" do
|
|
82
|
-
inner_app = ->(_env) { MockResponse.new(content: "loop check") }
|
|
83
|
-
fn = FakeFunc.new(name: "fs_read", arguments: '{"path":"x.rb"}')
|
|
84
|
-
messages = 2.times.map { assistant_msg_with_functions([fn]) }
|
|
85
|
-
middleware = Brute::Middleware::DoomLoopDetection.new(inner_app, threshold: 3)
|
|
86
|
-
env = build_env(messages: messages)
|
|
87
|
-
middleware.call(env)
|
|
88
|
-
env[:metadata][:doom_loop_detected].should.be.nil
|
|
89
|
-
end
|
|
90
|
-
|
|
91
|
-
it "sets should_exit reason when doom loop detected" do
|
|
92
|
-
inner_app = ->(_env) { MockResponse.new(content: "loop check") }
|
|
93
|
-
fn = FakeFunc.new(name: "fs_read", arguments: '{"path":"x.rb"}')
|
|
94
|
-
messages = 4.times.map { assistant_msg_with_functions([fn]) }
|
|
95
|
-
middleware = Brute::Middleware::DoomLoopDetection.new(inner_app, threshold: 3)
|
|
96
|
-
env = build_env(messages: messages)
|
|
97
|
-
middleware.call(env)
|
|
98
|
-
env[:should_exit][:reason].should == "doom_loop_detected"
|
|
99
|
-
end
|
|
100
|
-
|
|
101
|
-
it "does not set should_exit when no loop detected" do
|
|
102
|
-
inner_app = ->(_env) { MockResponse.new(content: "loop check") }
|
|
103
|
-
middleware = Brute::Middleware::DoomLoopDetection.new(inner_app, threshold: 3)
|
|
104
|
-
env = build_env
|
|
105
|
-
middleware.call(env)
|
|
106
|
-
env[:should_exit].should.be.nil
|
|
107
|
-
end
|
|
108
|
-
|
|
109
|
-
it "does not overwrite should_exit if already set" do
|
|
110
|
-
inner_app = ->(_env) { MockResponse.new(content: "loop check") }
|
|
111
|
-
fn = FakeFunc.new(name: "fs_read", arguments: '{"path":"x.rb"}')
|
|
112
|
-
messages = 4.times.map { assistant_msg_with_functions([fn]) }
|
|
113
|
-
middleware = Brute::Middleware::DoomLoopDetection.new(inner_app, threshold: 3)
|
|
114
|
-
existing = { reason: "other", message: "earlier", source: "Other" }
|
|
115
|
-
env = build_env(messages: messages, should_exit: existing)
|
|
116
|
-
middleware.call(env)
|
|
117
|
-
env[:should_exit][:reason].should == "other"
|
|
118
|
-
end
|
|
119
|
-
|
|
120
|
-
it "appends a warning message when loop detected" do
|
|
121
|
-
inner_app = ->(_env) { MockResponse.new(content: "loop check") }
|
|
122
|
-
fn = FakeFunc.new(name: "fs_read", arguments: '{"path":"x.rb"}')
|
|
123
|
-
messages = 4.times.map { assistant_msg_with_functions([fn]) }
|
|
124
|
-
middleware = Brute::Middleware::DoomLoopDetection.new(inner_app, threshold: 3)
|
|
125
|
-
env = build_env(messages: messages)
|
|
126
|
-
original_count = env[:messages].size
|
|
127
|
-
middleware.call(env)
|
|
128
|
-
env[:messages].size.should == original_count + 1
|
|
129
|
-
end
|
|
130
|
-
|
|
131
|
-
it "generates warning message with repetition count" do
|
|
132
|
-
detector = Brute::Loop::DoomLoopDetector.new(threshold: 3)
|
|
133
|
-
msg = detector.warning_message(5)
|
|
134
|
-
msg.should =~ /5 times/
|
|
135
|
-
end
|
|
136
|
-
end
|