brute 0.4.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/brute/agent.rb +14 -0
- data/lib/brute/diff.rb +18 -28
- data/lib/brute/loop/agent_stream.rb +118 -0
- data/lib/brute/loop/agent_turn.rb +520 -0
- data/lib/brute/{compactor.rb → loop/compactor.rb} +2 -0
- data/lib/brute/{doom_loop.rb → loop/doom_loop.rb} +2 -0
- data/lib/brute/loop/step.rb +332 -0
- data/lib/brute/loop/tool_call_step.rb +90 -0
- data/lib/brute/middleware/compaction_check.rb +60 -146
- data/lib/brute/middleware/doom_loop_detection.rb +95 -92
- data/lib/brute/middleware/llm_call.rb +78 -80
- data/lib/brute/middleware/message_tracking.rb +115 -162
- data/lib/brute/middleware/otel/span.rb +25 -106
- data/lib/brute/middleware/otel/token_usage.rb +29 -84
- data/lib/brute/middleware/otel/tool_calls.rb +23 -107
- data/lib/brute/middleware/otel/tool_results.rb +22 -86
- data/lib/brute/middleware/reasoning_normalizer.rb +78 -103
- data/lib/brute/middleware/retry.rb +95 -76
- data/lib/brute/middleware/session_persistence.rb +38 -37
- data/lib/brute/middleware/token_tracking.rb +64 -63
- data/lib/brute/middleware/tool_error_tracking.rb +108 -82
- data/lib/brute/middleware/tool_use_guard.rb +57 -90
- data/lib/brute/middleware/tracing.rb +53 -63
- data/lib/brute/middleware.rb +18 -0
- data/lib/brute/orchestrator/turn.rb +105 -0
- data/lib/brute/pipeline.rb +77 -133
- data/lib/brute/prompts/build_switch.rb +21 -25
- data/lib/brute/prompts/environment.rb +31 -35
- data/lib/brute/prompts/identity.rb +22 -29
- data/lib/brute/prompts/instructions.rb +15 -18
- data/lib/brute/prompts/max_steps.rb +18 -25
- data/lib/brute/prompts/plan_reminder.rb +18 -26
- data/lib/brute/prompts/skills.rb +8 -30
- data/lib/brute/prompts.rb +28 -0
- data/lib/brute/providers/ollama.rb +135 -0
- data/lib/brute/providers/shell.rb +2 -2
- data/lib/brute/providers/shell_response.rb +2 -2
- data/lib/brute/providers.rb +62 -0
- data/lib/brute/queue/base_queue.rb +222 -0
- data/lib/brute/{file_mutation_queue.rb → queue/file_mutation_queue.rb} +28 -26
- data/lib/brute/queue/parallel_queue.rb +66 -0
- data/lib/brute/queue/sequential_queue.rb +63 -0
- data/lib/brute/store/message_store.rb +362 -0
- data/lib/brute/store/session.rb +106 -0
- data/lib/brute/{snapshot_store.rb → store/snapshot_store.rb} +2 -0
- data/lib/brute/{todo_store.rb → store/todo_store.rb} +2 -0
- data/lib/brute/system_prompt.rb +81 -194
- data/lib/brute/tools/delegate.rb +46 -116
- data/lib/brute/tools/fs_patch.rb +36 -37
- data/lib/brute/tools/fs_remove.rb +2 -2
- data/lib/brute/tools/fs_undo.rb +2 -2
- data/lib/brute/tools/fs_write.rb +29 -41
- data/lib/brute/tools/todo_read.rb +1 -1
- data/lib/brute/tools/todo_write.rb +1 -1
- data/lib/brute/tools.rb +31 -0
- data/lib/brute/version.rb +1 -1
- data/lib/brute.rb +40 -204
- metadata +31 -20
- data/lib/brute/agent_stream.rb +0 -181
- data/lib/brute/hooks.rb +0 -84
- data/lib/brute/message_store.rb +0 -463
- data/lib/brute/orchestrator.rb +0 -550
- data/lib/brute/session.rb +0 -161
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "bundler/setup"
|
|
4
|
+
require "brute"
|
|
5
|
+
|
|
6
|
+
require "securerandom"
|
|
7
|
+
require "async"
|
|
8
|
+
|
|
9
|
+
module Brute
|
|
10
|
+
module Loop
|
|
11
|
+
# A first-class work object with identity, state, result/error capture,
|
|
12
|
+
# optional sub-queue, and cancellation.
|
|
13
|
+
#
|
|
14
|
+
# Users subclass Step and override #perform(task). The framework calls
|
|
15
|
+
# #call(task) which owns the state machine — subclasses never touch
|
|
16
|
+
# state transitions directly.
|
|
17
|
+
#
|
|
18
|
+
# State machine:
|
|
19
|
+
#
|
|
20
|
+
# ┌──> completed
|
|
21
|
+
# │
|
|
22
|
+
# pending ──> running ──┤
|
|
23
|
+
# │ │
|
|
24
|
+
# │ ├──> failed
|
|
25
|
+
# │ │
|
|
26
|
+
# └──> cancelled └──> cancelled
|
|
27
|
+
#
|
|
28
|
+
# Three terminal states. Two non-terminal. Once terminal, stays terminal.
|
|
29
|
+
#
|
|
30
|
+
class Step
|
|
31
|
+
STATES = %i[pending running completed failed cancelled].freeze
|
|
32
|
+
|
|
33
|
+
attr_reader :id
|
|
34
|
+
|
|
35
|
+
def initialize(id: nil, **attributes)
|
|
36
|
+
@id = id || self.class.generate_id
|
|
37
|
+
@attributes = attributes
|
|
38
|
+
@state = :pending
|
|
39
|
+
@result = nil
|
|
40
|
+
@error = nil
|
|
41
|
+
@task = nil
|
|
42
|
+
@jobs = nil
|
|
43
|
+
@mutex = Mutex.new
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def self.generate_id
|
|
47
|
+
"#{name}-#{Process.pid}-#{Thread.current.object_id}-#{SecureRandom.hex(4)}"
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Called by the queue's worker. Subclasses override #perform instead.
|
|
51
|
+
def call(task)
|
|
52
|
+
return unless transition_to_running(task)
|
|
53
|
+
|
|
54
|
+
begin
|
|
55
|
+
result = perform(task)
|
|
56
|
+
@mutex.synchronize do
|
|
57
|
+
@result = result
|
|
58
|
+
@state = :completed
|
|
59
|
+
@task = nil
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
rescue Async::Cancel
|
|
63
|
+
# Cascade to sub-queue before we lose the reference:
|
|
64
|
+
@jobs&.cancel
|
|
65
|
+
@mutex.synchronize do
|
|
66
|
+
@state = :cancelled
|
|
67
|
+
@task = nil
|
|
68
|
+
end
|
|
69
|
+
raise
|
|
70
|
+
|
|
71
|
+
rescue => error
|
|
72
|
+
# Continue-on-failure: record the error, do NOT re-raise.
|
|
73
|
+
@mutex.synchronize do
|
|
74
|
+
@error = error
|
|
75
|
+
@state = :failed
|
|
76
|
+
@task = nil
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Subclasses override this.
|
|
82
|
+
def perform(task)
|
|
83
|
+
raise "#{self.class}#perform not implemented"
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Lazy accessor — creates the sub-queue parented to our running task.
|
|
87
|
+
# Only valid while the step is running (inside #perform).
|
|
88
|
+
def jobs(type: Brute::Queue::SequentialQueue)
|
|
89
|
+
@mutex.synchronize do
|
|
90
|
+
raise "Step not running; sub-queue has nothing to parent to" unless @task
|
|
91
|
+
@jobs ||= type.new(parent: @task).start
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def state
|
|
96
|
+
@mutex.synchronize { @state }
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def result
|
|
100
|
+
@mutex.synchronize { @result }
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def error
|
|
104
|
+
@mutex.synchronize { @error }
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def status
|
|
108
|
+
@mutex.synchronize do
|
|
109
|
+
{ id: @id, state: @state, result: @result, error: @error }
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def cancel
|
|
114
|
+
task = @mutex.synchronize do
|
|
115
|
+
case @state
|
|
116
|
+
when :pending
|
|
117
|
+
@state = :cancelled
|
|
118
|
+
nil
|
|
119
|
+
when :running
|
|
120
|
+
@task
|
|
121
|
+
else
|
|
122
|
+
return false # already finished
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
task&.cancel
|
|
127
|
+
true
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
private
|
|
131
|
+
|
|
132
|
+
def transition_to_running(task)
|
|
133
|
+
@mutex.synchronize do
|
|
134
|
+
return false if @state == :cancelled
|
|
135
|
+
@state = :running
|
|
136
|
+
@task = task
|
|
137
|
+
true
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
test do
|
|
145
|
+
class HelloStep < Brute::Loop::Step
|
|
146
|
+
def perform(task)
|
|
147
|
+
"hello"
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
class FailStep < Brute::Loop::Step
|
|
152
|
+
def perform(task)
|
|
153
|
+
raise "boom"
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
class SlowStep < Brute::Loop::Step
|
|
158
|
+
def perform(task)
|
|
159
|
+
sleep 10
|
|
160
|
+
"done"
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# -- identity --
|
|
165
|
+
|
|
166
|
+
it "generates a unique id" do
|
|
167
|
+
HelloStep.new.id.should.be.kind_of String
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
it "accepts a custom id" do
|
|
171
|
+
HelloStep.new(id: "custom-1").id.should == "custom-1"
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
# -- initial state --
|
|
175
|
+
|
|
176
|
+
it "starts in pending state" do
|
|
177
|
+
HelloStep.new.state.should == :pending
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
it "starts with nil result" do
|
|
181
|
+
HelloStep.new.result.should.be.nil
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
it "starts with nil error" do
|
|
185
|
+
HelloStep.new.error.should.be.nil
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
# -- successful execution --
|
|
189
|
+
|
|
190
|
+
it "transitions to completed on success" do
|
|
191
|
+
Sync do
|
|
192
|
+
step = HelloStep.new
|
|
193
|
+
step.call(Async::Task.current)
|
|
194
|
+
step.state.should == :completed
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
it "captures the return value as result" do
|
|
199
|
+
Sync do
|
|
200
|
+
step = HelloStep.new
|
|
201
|
+
step.call(Async::Task.current)
|
|
202
|
+
step.result.should == "hello"
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
# -- failed execution --
|
|
207
|
+
|
|
208
|
+
it "transitions to failed on error" do
|
|
209
|
+
Sync do
|
|
210
|
+
step = FailStep.new
|
|
211
|
+
step.call(Async::Task.current)
|
|
212
|
+
step.state.should == :failed
|
|
213
|
+
end
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
it "captures the exception as error" do
|
|
217
|
+
Sync do
|
|
218
|
+
step = FailStep.new
|
|
219
|
+
step.call(Async::Task.current)
|
|
220
|
+
step.error.message.should == "boom"
|
|
221
|
+
end
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
it "does not re-raise on failure" do
|
|
225
|
+
Sync do
|
|
226
|
+
step = FailStep.new
|
|
227
|
+
lambda { step.call(Async::Task.current) }.should.not.raise
|
|
228
|
+
end
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
# -- cancellation of pending step --
|
|
232
|
+
|
|
233
|
+
it "cancel returns true for pending step" do
|
|
234
|
+
HelloStep.new.cancel.should.be.true
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
it "transitions pending step to cancelled" do
|
|
238
|
+
step = HelloStep.new
|
|
239
|
+
step.cancel
|
|
240
|
+
step.state.should == :cancelled
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
it "skips perform when cancelled before call" do
|
|
244
|
+
Sync do
|
|
245
|
+
step = HelloStep.new
|
|
246
|
+
step.cancel
|
|
247
|
+
step.call(Async::Task.current)
|
|
248
|
+
step.result.should.be.nil
|
|
249
|
+
end
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
# -- cancellation of finished step --
|
|
253
|
+
|
|
254
|
+
it "cancel returns false for completed step" do
|
|
255
|
+
Sync do
|
|
256
|
+
step = HelloStep.new
|
|
257
|
+
step.call(Async::Task.current)
|
|
258
|
+
step.cancel.should.be.false
|
|
259
|
+
end
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
it "cancel returns false for failed step" do
|
|
263
|
+
Sync do
|
|
264
|
+
step = FailStep.new
|
|
265
|
+
step.call(Async::Task.current)
|
|
266
|
+
step.cancel.should.be.false
|
|
267
|
+
end
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
# -- status --
|
|
271
|
+
|
|
272
|
+
it "status includes id" do
|
|
273
|
+
step = HelloStep.new(id: "s1")
|
|
274
|
+
step.status[:id].should == "s1"
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
it "status includes state" do
|
|
278
|
+
step = HelloStep.new
|
|
279
|
+
step.status[:state].should == :pending
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
# -- perform not implemented --
|
|
283
|
+
|
|
284
|
+
it "raises NotImplementedError for base Step" do
|
|
285
|
+
Sync do
|
|
286
|
+
step = Brute::Loop::Step.new
|
|
287
|
+
step.call(Async::Task.current)
|
|
288
|
+
step.state.should == :failed
|
|
289
|
+
end
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
# -- jobs raises when not running --
|
|
293
|
+
|
|
294
|
+
it "raises when accessing jobs outside perform" do
|
|
295
|
+
lambda { HelloStep.new.jobs(type: Array) }.should.raise(RuntimeError)
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
# -- attributes stored --
|
|
299
|
+
|
|
300
|
+
it "stores attributes" do
|
|
301
|
+
step = HelloStep.new(url: "https://example.com")
|
|
302
|
+
step.instance_variable_get(:@attributes)[:url].should == "https://example.com"
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
# -- nested sub-queue --
|
|
306
|
+
|
|
307
|
+
describe "nesting" do
|
|
308
|
+
class ParentStep < Brute::Loop::Step
|
|
309
|
+
def perform(task)
|
|
310
|
+
3.times { |i| jobs(type: Brute::Queue::SequentialQueue) << HelloStep.new(id: "child-#{i}") }
|
|
311
|
+
jobs.drain
|
|
312
|
+
jobs.steps.map(&:result)
|
|
313
|
+
end
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
it "creates a sub-queue inside perform" do
|
|
317
|
+
Sync do
|
|
318
|
+
step = ParentStep.new
|
|
319
|
+
step.call(Async::Task.current)
|
|
320
|
+
step.result.should == ["hello", "hello", "hello"]
|
|
321
|
+
end
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
it "sub-steps all complete" do
|
|
325
|
+
Sync do
|
|
326
|
+
step = ParentStep.new
|
|
327
|
+
step.call(Async::Task.current)
|
|
328
|
+
step.instance_variable_get(:@jobs).steps.all? { |s| s.state == :completed }.should.be.true
|
|
329
|
+
end
|
|
330
|
+
end
|
|
331
|
+
end
|
|
332
|
+
end
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "bundler/setup"
|
|
4
|
+
require "brute"
|
|
5
|
+
|
|
6
|
+
module Brute
|
|
7
|
+
module Loop
|
|
8
|
+
# A Step that wraps an LLM::Function tool call.
|
|
9
|
+
#
|
|
10
|
+
# Identity comes from the function's call ID so tool results
|
|
11
|
+
# can be correlated back to the LLM's request.
|
|
12
|
+
#
|
|
13
|
+
class ToolCallStep < Step
|
|
14
|
+
attr_reader :function
|
|
15
|
+
|
|
16
|
+
def initialize(function:, **rest)
|
|
17
|
+
super(id: function.id, **rest)
|
|
18
|
+
@function = function
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def perform(task)
|
|
22
|
+
@function.call
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
test do
|
|
29
|
+
FakeFunction = Struct.new(:id, :name, :arguments, :return_value) do
|
|
30
|
+
def call
|
|
31
|
+
return_value
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
class FailFunction
|
|
36
|
+
attr_reader :id, :name, :arguments
|
|
37
|
+
def initialize
|
|
38
|
+
@id = "fail_1"
|
|
39
|
+
@name = "fail"
|
|
40
|
+
@arguments = {}
|
|
41
|
+
end
|
|
42
|
+
def call
|
|
43
|
+
raise "tool exploded"
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
it "uses function id as step id" do
|
|
48
|
+
fn = FakeFunction.new("call_123", "read", {}, "content")
|
|
49
|
+
Brute::Loop::ToolCallStep.new(function: fn).id.should == "call_123"
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
it "calls the function in perform" do
|
|
53
|
+
Sync do
|
|
54
|
+
fn = FakeFunction.new("call_1", "read", {}, "file contents")
|
|
55
|
+
step = Brute::Loop::ToolCallStep.new(function: fn)
|
|
56
|
+
step.call(Async::Task.current)
|
|
57
|
+
step.result.should == "file contents"
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
it "transitions to completed on success" do
|
|
62
|
+
Sync do
|
|
63
|
+
fn = FakeFunction.new("call_2", "write", {}, "ok")
|
|
64
|
+
step = Brute::Loop::ToolCallStep.new(function: fn)
|
|
65
|
+
step.call(Async::Task.current)
|
|
66
|
+
step.state.should == :completed
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
it "captures function as accessor" do
|
|
71
|
+
fn = FakeFunction.new("call_3", "shell", {}, nil)
|
|
72
|
+
Brute::Loop::ToolCallStep.new(function: fn).function.should.be.identical_to fn
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
it "transitions to failed when function raises" do
|
|
76
|
+
Sync do
|
|
77
|
+
step = Brute::Loop::ToolCallStep.new(function: FailFunction.new)
|
|
78
|
+
step.call(Async::Task.current)
|
|
79
|
+
step.state.should == :failed
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
it "captures function error" do
|
|
84
|
+
Sync do
|
|
85
|
+
step = Brute::Loop::ToolCallStep.new(function: FailFunction.new)
|
|
86
|
+
step.call(Async::Task.current)
|
|
87
|
+
step.error.message.should == "tool exploded"
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
@@ -1,192 +1,106 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
require "brute"
|
|
6
|
-
end
|
|
3
|
+
require "bundler/setup"
|
|
4
|
+
require "brute"
|
|
7
5
|
|
|
8
6
|
module Brute
|
|
9
7
|
module Middleware
|
|
10
8
|
# Checks context size after each LLM call and triggers compaction
|
|
11
9
|
# when thresholds are exceeded.
|
|
12
10
|
#
|
|
13
|
-
# Runs POST-call: inspects message count and token usage
|
|
14
|
-
#
|
|
15
|
-
#
|
|
11
|
+
# Runs POST-call: inspects message count and token usage. If compaction
|
|
12
|
+
# is needed, summarizes older messages and replaces env[:messages] with
|
|
13
|
+
# the summary so the next LLM call starts with a compact history.
|
|
16
14
|
#
|
|
17
15
|
class CompactionCheck < Base
|
|
18
|
-
def initialize(app, compactor:, system_prompt
|
|
16
|
+
def initialize(app, compactor:, system_prompt:)
|
|
19
17
|
super(app)
|
|
20
18
|
@compactor = compactor
|
|
21
19
|
@system_prompt = system_prompt
|
|
22
|
-
@tools = tools
|
|
23
|
-
@stream = stream
|
|
24
20
|
end
|
|
25
21
|
|
|
26
22
|
def call(env)
|
|
27
23
|
response = @app.call(env)
|
|
28
24
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
usage = ctx.usage rescue nil
|
|
25
|
+
messages = env[:messages]
|
|
26
|
+
usage = env[:metadata].dig(:tokens, :last_call)
|
|
32
27
|
|
|
33
28
|
if @compactor.should_compact?(messages, usage: usage)
|
|
34
29
|
result = @compactor.compact(messages)
|
|
35
30
|
if result
|
|
36
31
|
summary_text, _recent = result
|
|
37
|
-
rebuild_context!(env, summary_text)
|
|
38
32
|
env[:metadata][:compaction] = {
|
|
39
33
|
messages_before: messages.size,
|
|
40
34
|
timestamp: Time.now.iso8601,
|
|
41
35
|
}
|
|
36
|
+
# Replace the message history with the summary
|
|
37
|
+
env[:messages] = [
|
|
38
|
+
LLM::Message.new(:system, @system_prompt),
|
|
39
|
+
LLM::Message.new(:user, "[Previous conversation summary]\n\n#{summary_text}"),
|
|
40
|
+
]
|
|
42
41
|
end
|
|
43
42
|
end
|
|
44
43
|
|
|
45
44
|
response
|
|
46
45
|
end
|
|
47
|
-
|
|
48
|
-
private
|
|
49
|
-
|
|
50
|
-
def rebuild_context!(env, summary_text)
|
|
51
|
-
provider = env[:provider]
|
|
52
|
-
ctx_opts = { tools: @tools }
|
|
53
|
-
ctx_opts[:stream] = @stream if @stream
|
|
54
|
-
new_ctx = LLM::Context.new(provider, **ctx_opts)
|
|
55
|
-
prompt = new_ctx.prompt do |p|
|
|
56
|
-
p.system @system_prompt
|
|
57
|
-
p.user "[Previous conversation summary]\n\n#{summary_text}"
|
|
58
|
-
end
|
|
59
|
-
new_ctx.talk(prompt)
|
|
60
|
-
env[:context] = new_ctx
|
|
61
|
-
end
|
|
62
46
|
end
|
|
63
47
|
end
|
|
64
48
|
end
|
|
65
49
|
|
|
66
|
-
|
|
67
|
-
require_relative "../../../spec/
|
|
68
|
-
|
|
69
|
-
RSpec.describe Brute::Middleware::CompactionCheck do
|
|
70
|
-
let(:response) { MockResponse.new(content: "compaction response") }
|
|
71
|
-
let(:inner_app) { ->(_env) { response } }
|
|
72
|
-
let(:compactor) { double("compactor") }
|
|
73
|
-
let(:system_prompt) { "You are a helpful assistant." }
|
|
74
|
-
let(:tools) { [] }
|
|
75
|
-
let(:middleware) do
|
|
76
|
-
described_class.new(inner_app, compactor: compactor, system_prompt: system_prompt, tools: tools)
|
|
77
|
-
end
|
|
78
|
-
|
|
79
|
-
it "passes the response through when compaction is not needed" do
|
|
80
|
-
allow(compactor).to receive(:should_compact?).and_return(false)
|
|
81
|
-
env = build_env
|
|
82
|
-
|
|
83
|
-
result = middleware.call(env)
|
|
84
|
-
|
|
85
|
-
expect(result).to eq(response)
|
|
86
|
-
expect(env[:metadata][:compaction]).to be_nil
|
|
87
|
-
end
|
|
88
|
-
|
|
89
|
-
it "does not replace context when compaction is not triggered" do
|
|
90
|
-
allow(compactor).to receive(:should_compact?).and_return(false)
|
|
91
|
-
env = build_env
|
|
92
|
-
original_ctx = env[:context]
|
|
93
|
-
|
|
94
|
-
middleware.call(env)
|
|
95
|
-
|
|
96
|
-
expect(env[:context]).to equal(original_ctx)
|
|
97
|
-
end
|
|
98
|
-
|
|
99
|
-
it "triggers compaction and rebuilds context when threshold is exceeded" do
|
|
100
|
-
allow(compactor).to receive(:should_compact?).and_return(true)
|
|
101
|
-
allow(compactor).to receive(:compact).and_return(["Summary of conversation", []])
|
|
102
|
-
|
|
103
|
-
provider = MockProvider.new
|
|
104
|
-
ctx = LLM::Context.new(provider, tools: [])
|
|
105
|
-
prompt = ctx.prompt { |p| p.system("sys"); p.user("hello") }
|
|
106
|
-
ctx.talk(prompt)
|
|
107
|
-
|
|
108
|
-
env = build_env(context: ctx, provider: provider)
|
|
109
|
-
middleware.call(env)
|
|
110
|
-
|
|
111
|
-
expect(env[:metadata][:compaction]).to include(:messages_before, :timestamp)
|
|
112
|
-
expect(env[:context]).not_to equal(ctx)
|
|
113
|
-
end
|
|
114
|
-
|
|
115
|
-
it "handles compactor returning nil gracefully" do
|
|
116
|
-
allow(compactor).to receive(:should_compact?).and_return(true)
|
|
117
|
-
allow(compactor).to receive(:compact).and_return(nil)
|
|
118
|
-
|
|
119
|
-
env = build_env
|
|
120
|
-
original_ctx = env[:context]
|
|
50
|
+
test do
|
|
51
|
+
require_relative "../../../spec/support/mock_provider"
|
|
52
|
+
require_relative "../../../spec/support/mock_response"
|
|
121
53
|
|
|
122
|
-
|
|
54
|
+
def build_env(**overrides)
|
|
55
|
+
{ provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
|
|
56
|
+
messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
|
|
57
|
+
tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
|
|
58
|
+
end
|
|
123
59
|
|
|
124
|
-
|
|
125
|
-
|
|
60
|
+
def make_compactor(should: false, result: nil)
|
|
61
|
+
Object.new.tap do |c|
|
|
62
|
+
c.define_singleton_method(:should_compact?) { |_msgs, **_| should }
|
|
63
|
+
c.define_singleton_method(:compact) { |_msgs| result }
|
|
126
64
|
end
|
|
65
|
+
end
|
|
127
66
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
tools: tools,
|
|
136
|
-
stream: stream,
|
|
137
|
-
)
|
|
138
|
-
end
|
|
139
|
-
|
|
140
|
-
it "preserves the stream parameter on the rebuilt context" do
|
|
141
|
-
allow(compactor).to receive(:should_compact?).and_return(true)
|
|
142
|
-
allow(compactor).to receive(:compact).and_return(["Summary of conversation", []])
|
|
143
|
-
|
|
144
|
-
provider = MockProvider.new
|
|
145
|
-
original_ctx = LLM::Context.new(provider, tools: [], stream: stream)
|
|
146
|
-
prompt = original_ctx.prompt { |p| p.system("sys"); p.user("hello") }
|
|
147
|
-
original_ctx.talk(prompt)
|
|
148
|
-
|
|
149
|
-
env = build_env(context: original_ctx, provider: provider, streaming: true)
|
|
150
|
-
middleware_with_stream.call(env)
|
|
151
|
-
|
|
152
|
-
new_ctx = env[:context]
|
|
153
|
-
expect(new_ctx).not_to equal(original_ctx)
|
|
154
|
-
|
|
155
|
-
ctx_params = new_ctx.instance_variable_get(:@params)
|
|
156
|
-
expect(ctx_params[:stream]).to eq(stream),
|
|
157
|
-
"Expected rebuilt context to have stream: #{stream.inspect} " \
|
|
158
|
-
"in @params, but got: #{ctx_params[:stream].inspect}. " \
|
|
159
|
-
"This causes on_content callbacks to silently stop firing after compaction."
|
|
160
|
-
end
|
|
161
|
-
|
|
162
|
-
it "fires on_content callback on the rebuilt context when streaming" do
|
|
163
|
-
received_content = nil
|
|
164
|
-
callback = ->(text) { received_content = text }
|
|
165
|
-
|
|
166
|
-
allow(compactor).to receive(:should_compact?).and_return(true)
|
|
167
|
-
allow(compactor).to receive(:compact).and_return(["Summary", []])
|
|
67
|
+
it "passes the response through when compaction is not needed" do
|
|
68
|
+
response = MockResponse.new(content: "compaction response")
|
|
69
|
+
compactor = make_compactor(should: false)
|
|
70
|
+
middleware = Brute::Middleware::CompactionCheck.new(->(_env) { response }, compactor: compactor, system_prompt: "sys")
|
|
71
|
+
result = middleware.call(build_env)
|
|
72
|
+
result.should == response
|
|
73
|
+
end
|
|
168
74
|
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
75
|
+
it "does not set compaction metadata when not needed" do
|
|
76
|
+
compactor = make_compactor(should: false)
|
|
77
|
+
middleware = Brute::Middleware::CompactionCheck.new(->(_env) { MockResponse.new }, compactor: compactor, system_prompt: "sys")
|
|
78
|
+
env = build_env
|
|
79
|
+
middleware.call(env)
|
|
80
|
+
env[:metadata][:compaction].should.be.nil
|
|
81
|
+
end
|
|
173
82
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
83
|
+
it "replaces messages with summary when compaction triggers" do
|
|
84
|
+
compactor = make_compactor(should: true, result: ["Summary of conversation", []])
|
|
85
|
+
middleware = Brute::Middleware::CompactionCheck.new(->(_env) { MockResponse.new }, compactor: compactor, system_prompt: "sys")
|
|
86
|
+
env = build_env(messages: [LLM::Message.new(:user, "hello"), LLM::Message.new(:assistant, "hi"), LLM::Message.new(:user, "how")])
|
|
87
|
+
middleware.call(env)
|
|
88
|
+
env[:metadata][:compaction][:messages_before].should == 3
|
|
89
|
+
end
|
|
181
90
|
|
|
182
|
-
|
|
91
|
+
it "creates two messages after compaction" do
|
|
92
|
+
compactor = make_compactor(should: true, result: ["Summary", []])
|
|
93
|
+
middleware = Brute::Middleware::CompactionCheck.new(->(_env) { MockResponse.new }, compactor: compactor, system_prompt: "sys")
|
|
94
|
+
env = build_env(messages: [LLM::Message.new(:user, "hello")])
|
|
95
|
+
middleware.call(env)
|
|
96
|
+
env[:messages].size.should == 2
|
|
97
|
+
end
|
|
183
98
|
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
end
|
|
99
|
+
it "handles compactor returning nil gracefully" do
|
|
100
|
+
compactor = make_compactor(should: true, result: nil)
|
|
101
|
+
middleware = Brute::Middleware::CompactionCheck.new(->(_env) { MockResponse.new }, compactor: compactor, system_prompt: "sys")
|
|
102
|
+
env = build_env(messages: [LLM::Message.new(:user, "hello")])
|
|
103
|
+
middleware.call(env)
|
|
104
|
+
env[:metadata][:compaction].should.be.nil
|
|
191
105
|
end
|
|
192
106
|
end
|