brute 0.3.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/brute/agent_stream.rb +126 -2
- data/lib/brute/diff.rb +34 -0
- data/lib/brute/message_store.rb +194 -0
- data/lib/brute/middleware/compaction_check.rb +133 -0
- data/lib/brute/middleware/doom_loop_detection.rb +100 -0
- data/lib/brute/middleware/llm_call.rb +89 -0
- data/lib/brute/middleware/message_tracking.rb +177 -0
- data/lib/brute/middleware/otel/span.rb +111 -0
- data/lib/brute/middleware/otel/token_usage.rb +93 -0
- data/lib/brute/middleware/otel/tool_calls.rb +113 -0
- data/lib/brute/middleware/otel/tool_results.rb +92 -0
- data/lib/brute/middleware/otel.rb +5 -0
- data/lib/brute/middleware/reasoning_normalizer.rb +119 -0
- data/lib/brute/middleware/retry.rb +93 -0
- data/lib/brute/middleware/session_persistence.rb +42 -0
- data/lib/brute/middleware/token_tracking.rb +77 -0
- data/lib/brute/middleware/tool_error_tracking.rb +101 -0
- data/lib/brute/middleware/tool_use_guard.rb +70 -1
- data/lib/brute/middleware/tracing.rb +71 -0
- data/lib/brute/orchestrator.rb +169 -3
- data/lib/brute/patches/buffer_nil_guard.rb +5 -0
- data/lib/brute/pipeline.rb +135 -0
- data/lib/brute/prompts/build_switch.rb +33 -0
- data/lib/brute/prompts/environment.rb +47 -0
- data/lib/brute/prompts/identity.rb +36 -0
- data/lib/brute/prompts/instructions.rb +24 -0
- data/lib/brute/prompts/max_steps.rb +32 -0
- data/lib/brute/prompts/plan_reminder.rb +33 -0
- data/lib/brute/prompts/skills.rb +35 -0
- data/lib/brute/providers/opencode_go.rb +5 -0
- data/lib/brute/providers/opencode_zen.rb +7 -2
- data/lib/brute/providers/shell_response.rb +5 -0
- data/lib/brute/system_prompt.rb +214 -0
- data/lib/brute/tools/delegate.rb +129 -0
- data/lib/brute/tools/fs_patch.rb +53 -0
- data/lib/brute/tools/fs_read.rb +5 -0
- data/lib/brute/tools/fs_remove.rb +5 -0
- data/lib/brute/tools/fs_search.rb +5 -0
- data/lib/brute/tools/fs_undo.rb +5 -0
- data/lib/brute/tools/fs_write.rb +50 -0
- data/lib/brute/tools/net_fetch.rb +5 -0
- data/lib/brute/tools/question.rb +5 -0
- data/lib/brute/tools/shell.rb +5 -0
- data/lib/brute/tools/todo_read.rb +5 -0
- data/lib/brute/tools/todo_write.rb +5 -0
- data/lib/brute/version.rb +1 -1
- data/lib/brute.rb +8 -8
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 87cc119e0dc26d2499af1ceab1fe26c0ea2a6b9685c2acb9718a29d155959971
|
|
4
|
+
data.tar.gz: 7887f2d6d2a3680cf660c93bbd64999f45e28afcf1f86fbd4fe52605fc0fae28
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 4b04baad572cd024f4e7cf0b6b42fb3b8794e773918e3d4ee21d751ec0a296e4047903fdf290c68dc54aa9faf0556363b216c3e8f1eb23ce11c6e646bd7f14ca
|
|
7
|
+
data.tar.gz: c719e091120b55f5f0f93149c0502dc39cfdb984ae141a71260d1f1a8116824a6718e63639a092726bc83fe199c65b2cf1f98412c82991b13e06d297b488f092
|
data/lib/brute/agent_stream.rb
CHANGED
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
if __FILE__ == $0
|
|
4
|
+
require "bundler/setup"
|
|
5
|
+
require "brute"
|
|
6
|
+
end
|
|
7
|
+
|
|
3
8
|
module Brute
|
|
4
9
|
# Bridges llm.rb's streaming callbacks to the host application.
|
|
5
10
|
#
|
|
@@ -48,10 +53,129 @@ module Brute
|
|
|
48
53
|
@pending_tools << [tool, error]
|
|
49
54
|
end
|
|
50
55
|
|
|
51
|
-
# Clear
|
|
52
|
-
|
|
56
|
+
# Clear only the tool call metadata (used by ToolUseGuard after it
|
|
57
|
+
# has consumed the data for synthetic message injection).
|
|
58
|
+
def clear_pending_tool_calls!
|
|
53
59
|
@pending_tool_calls.clear
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Clear the deferred execution queue after the orchestrator has
|
|
63
|
+
# consumed and dispatched all tool calls.
|
|
64
|
+
def clear_pending_tools!
|
|
54
65
|
@pending_tools.clear
|
|
55
66
|
end
|
|
56
67
|
end
|
|
57
68
|
end
|
|
69
|
+
|
|
70
|
+
if __FILE__ == $0
|
|
71
|
+
require_relative "../../spec/spec_helper"
|
|
72
|
+
|
|
73
|
+
RSpec.describe Brute::AgentStream do
|
|
74
|
+
# Build a mock tool that quacks like LLM::Function.
|
|
75
|
+
def mock_tool(id:, name:, arguments: {})
|
|
76
|
+
instance_double(LLM::Function,
|
|
77
|
+
id: id,
|
|
78
|
+
name: name,
|
|
79
|
+
arguments: arguments,
|
|
80
|
+
)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
describe "#on_tool_call" do
|
|
84
|
+
it "records tool/error pair in pending_tools without spawning threads" do
|
|
85
|
+
stream = described_class.new
|
|
86
|
+
tool = mock_tool(id: "toolu_1", name: "read")
|
|
87
|
+
|
|
88
|
+
stream.on_tool_call(tool, nil)
|
|
89
|
+
|
|
90
|
+
expect(stream.pending_tools.size).to eq(1)
|
|
91
|
+
recorded_tool, recorded_error = stream.pending_tools.first
|
|
92
|
+
expect(recorded_tool).to eq(tool)
|
|
93
|
+
expect(recorded_error).to be_nil
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
it "records error tools in pending_tools" do
|
|
97
|
+
stream = described_class.new
|
|
98
|
+
tool = mock_tool(id: "toolu_err", name: "bad_tool")
|
|
99
|
+
error = LLM::Function::Return.new("toolu_err", "bad_tool", { error: true })
|
|
100
|
+
|
|
101
|
+
stream.on_tool_call(tool, error)
|
|
102
|
+
|
|
103
|
+
expect(stream.pending_tools.size).to eq(1)
|
|
104
|
+
_, recorded_error = stream.pending_tools.first
|
|
105
|
+
expect(recorded_error).to eq(error)
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
it "records pending tool call metadata for ToolUseGuard" do
|
|
109
|
+
stream = described_class.new
|
|
110
|
+
tool = mock_tool(
|
|
111
|
+
id: "toolu_abc",
|
|
112
|
+
name: "read",
|
|
113
|
+
arguments: { "file_path" => "test.rb" },
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
stream.on_tool_call(tool, nil)
|
|
117
|
+
|
|
118
|
+
calls = stream.pending_tool_calls
|
|
119
|
+
expect(calls).not_to be_empty
|
|
120
|
+
expect(calls.first).to include(
|
|
121
|
+
id: "toolu_abc",
|
|
122
|
+
name: "read",
|
|
123
|
+
arguments: { "file_path" => "test.rb" },
|
|
124
|
+
)
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
it "records metadata for multiple tool calls" do
|
|
128
|
+
stream = described_class.new
|
|
129
|
+
tool1 = mock_tool(id: "toolu_1", name: "read", arguments: { "file_path" => "a.rb" })
|
|
130
|
+
tool2 = mock_tool(id: "toolu_2", name: "write", arguments: { "file_path" => "b.rb" })
|
|
131
|
+
|
|
132
|
+
stream.on_tool_call(tool1, nil)
|
|
133
|
+
stream.on_tool_call(tool2, nil)
|
|
134
|
+
|
|
135
|
+
expect(stream.pending_tool_calls.size).to eq(2)
|
|
136
|
+
expect(stream.pending_tool_calls.map { |c| c[:id] }).to eq(["toolu_1", "toolu_2"])
|
|
137
|
+
|
|
138
|
+
expect(stream.pending_tools.size).to eq(2)
|
|
139
|
+
expect(stream.pending_tools.map { |t, _| t }).to eq([tool1, tool2])
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
describe "#clear_pending_tool_calls! and #clear_pending_tools!" do
|
|
144
|
+
it "empties both pending_tool_calls and pending_tools" do
|
|
145
|
+
stream = described_class.new
|
|
146
|
+
tool = mock_tool(id: "toolu_1", name: "read")
|
|
147
|
+
|
|
148
|
+
stream.on_tool_call(tool, nil)
|
|
149
|
+
expect(stream.pending_tool_calls).not_to be_empty
|
|
150
|
+
expect(stream.pending_tools).not_to be_empty
|
|
151
|
+
|
|
152
|
+
stream.clear_pending_tool_calls!
|
|
153
|
+
stream.clear_pending_tools!
|
|
154
|
+
expect(stream.pending_tool_calls).to be_empty
|
|
155
|
+
expect(stream.pending_tools).to be_empty
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
describe "#on_content" do
|
|
160
|
+
it "fires the content callback" do
|
|
161
|
+
received = nil
|
|
162
|
+
stream = described_class.new(on_content: ->(text) { received = text })
|
|
163
|
+
|
|
164
|
+
stream.on_content("hello")
|
|
165
|
+
|
|
166
|
+
expect(received).to eq("hello")
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
describe "#on_reasoning_content" do
|
|
171
|
+
it "fires the reasoning callback" do
|
|
172
|
+
received = nil
|
|
173
|
+
stream = described_class.new(on_reasoning: ->(text) { received = text })
|
|
174
|
+
|
|
175
|
+
stream.on_reasoning_content("thinking...")
|
|
176
|
+
|
|
177
|
+
expect(received).to eq("thinking...")
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
end
|
data/lib/brute/diff.rb
CHANGED
|
@@ -24,3 +24,37 @@ module Brute
|
|
|
24
24
|
end
|
|
25
25
|
end
|
|
26
26
|
end
|
|
27
|
+
|
|
28
|
+
if __FILE__ == $0
|
|
29
|
+
require_relative "../../spec/spec_helper"
|
|
30
|
+
|
|
31
|
+
RSpec.describe Brute::Diff do
|
|
32
|
+
describe ".unified" do
|
|
33
|
+
it "generates a unified diff for changed content" do
|
|
34
|
+
old = "line1\nold\nline3\n"
|
|
35
|
+
new_text = "line1\nnew\nline3\n"
|
|
36
|
+
diff = described_class.unified(old, new_text)
|
|
37
|
+
expect(diff).to include("-old")
|
|
38
|
+
expect(diff).to include("+new")
|
|
39
|
+
expect(diff).to include("@@")
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
it "returns empty string for identical content" do
|
|
43
|
+
text = "same\ncontent\n"
|
|
44
|
+
expect(described_class.unified(text, text)).to eq("")
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
it "handles empty old content (new file)" do
|
|
48
|
+
diff = described_class.unified("", "new\ncontent\n")
|
|
49
|
+
expect(diff).to include("+new")
|
|
50
|
+
expect(diff).to include("+content")
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
it "handles empty new content (deleted file)" do
|
|
54
|
+
diff = described_class.unified("old\ncontent\n", "")
|
|
55
|
+
expect(diff).to include("-old")
|
|
56
|
+
expect(diff).to include("-content")
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
data/lib/brute/message_store.rb
CHANGED
|
@@ -267,3 +267,197 @@ module Brute
|
|
|
267
267
|
end
|
|
268
268
|
end
|
|
269
269
|
end
|
|
270
|
+
|
|
271
|
+
if __FILE__ == $0
|
|
272
|
+
require_relative "../../spec/spec_helper"
|
|
273
|
+
|
|
274
|
+
require "tmpdir"
|
|
275
|
+
|
|
276
|
+
RSpec.describe Brute::MessageStore do
|
|
277
|
+
let(:tmpdir) { Dir.mktmpdir("brute_test_") }
|
|
278
|
+
let(:session_id) { "test-session-123" }
|
|
279
|
+
let(:store) { described_class.new(session_id: session_id, dir: tmpdir) }
|
|
280
|
+
|
|
281
|
+
after { FileUtils.rm_rf(tmpdir) }
|
|
282
|
+
|
|
283
|
+
describe "#append_user" do
|
|
284
|
+
it "creates a user message with text part" do
|
|
285
|
+
id = store.append_user(text: "Hello")
|
|
286
|
+
|
|
287
|
+
msg = store.message(id)
|
|
288
|
+
expect(msg[:info][:role]).to eq("user")
|
|
289
|
+
expect(msg[:info][:sessionID]).to eq(session_id)
|
|
290
|
+
expect(msg[:parts].size).to eq(1)
|
|
291
|
+
expect(msg[:parts][0][:type]).to eq("text")
|
|
292
|
+
expect(msg[:parts][0][:text]).to eq("Hello")
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
it "generates sequential message IDs" do
|
|
296
|
+
id1 = store.append_user(text: "First")
|
|
297
|
+
id2 = store.append_user(text: "Second")
|
|
298
|
+
|
|
299
|
+
expect(id1).to eq("msg_0001")
|
|
300
|
+
expect(id2).to eq("msg_0002")
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
it "persists to disk as JSON" do
|
|
304
|
+
id = store.append_user(text: "Persisted")
|
|
305
|
+
|
|
306
|
+
path = File.join(tmpdir, "#{id}.json")
|
|
307
|
+
expect(File.exist?(path)).to be true
|
|
308
|
+
|
|
309
|
+
data = JSON.parse(File.read(path), symbolize_names: true)
|
|
310
|
+
expect(data[:info][:role]).to eq("user")
|
|
311
|
+
expect(data[:parts][0][:text]).to eq("Persisted")
|
|
312
|
+
end
|
|
313
|
+
end
|
|
314
|
+
|
|
315
|
+
describe "#append_assistant" do
|
|
316
|
+
it "creates an assistant message" do
|
|
317
|
+
user_id = store.append_user(text: "Hi")
|
|
318
|
+
asst_id = store.append_assistant(parent_id: user_id, model_id: "claude", provider_id: "anthropic")
|
|
319
|
+
|
|
320
|
+
msg = store.message(asst_id)
|
|
321
|
+
expect(msg[:info][:role]).to eq("assistant")
|
|
322
|
+
expect(msg[:info][:parentID]).to eq(user_id)
|
|
323
|
+
expect(msg[:info][:modelID]).to eq("claude")
|
|
324
|
+
expect(msg[:info][:providerID]).to eq("anthropic")
|
|
325
|
+
expect(msg[:info][:tokens]).to include(input: 0, output: 0)
|
|
326
|
+
expect(msg[:parts]).to be_empty
|
|
327
|
+
end
|
|
328
|
+
end
|
|
329
|
+
|
|
330
|
+
describe "#add_text_part" do
|
|
331
|
+
it "appends a text part to an existing message" do
|
|
332
|
+
asst_id = store.append_assistant
|
|
333
|
+
|
|
334
|
+
store.add_text_part(message_id: asst_id, text: "Here is my response")
|
|
335
|
+
|
|
336
|
+
msg = store.message(asst_id)
|
|
337
|
+
expect(msg[:parts].size).to eq(1)
|
|
338
|
+
expect(msg[:parts][0][:type]).to eq("text")
|
|
339
|
+
expect(msg[:parts][0][:text]).to eq("Here is my response")
|
|
340
|
+
end
|
|
341
|
+
end
|
|
342
|
+
|
|
343
|
+
describe "#add_tool_part / #complete_tool_part / #error_tool_part" do
|
|
344
|
+
it "tracks tool lifecycle: running → completed" do
|
|
345
|
+
asst_id = store.append_assistant
|
|
346
|
+
|
|
347
|
+
store.add_tool_part(
|
|
348
|
+
message_id: asst_id,
|
|
349
|
+
tool: "read",
|
|
350
|
+
call_id: "call_001",
|
|
351
|
+
input: { file_path: "/tmp/test.rb" },
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
msg = store.message(asst_id)
|
|
355
|
+
tool_part = msg[:parts].find { |p| p[:type] == "tool" }
|
|
356
|
+
expect(tool_part[:tool]).to eq("read")
|
|
357
|
+
expect(tool_part[:state][:status]).to eq("running")
|
|
358
|
+
|
|
359
|
+
store.complete_tool_part(
|
|
360
|
+
message_id: asst_id,
|
|
361
|
+
call_id: "call_001",
|
|
362
|
+
output: "file contents here",
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
msg = store.message(asst_id)
|
|
366
|
+
tool_part = msg[:parts].find { |p| p[:type] == "tool" }
|
|
367
|
+
expect(tool_part[:state][:status]).to eq("completed")
|
|
368
|
+
expect(tool_part[:state][:output]).to eq("file contents here")
|
|
369
|
+
expect(tool_part[:state][:time][:end]).to be_a(Integer)
|
|
370
|
+
end
|
|
371
|
+
|
|
372
|
+
it "tracks tool lifecycle: running → error" do
|
|
373
|
+
asst_id = store.append_assistant
|
|
374
|
+
|
|
375
|
+
store.add_tool_part(
|
|
376
|
+
message_id: asst_id,
|
|
377
|
+
tool: "shell",
|
|
378
|
+
call_id: "call_002",
|
|
379
|
+
input: { command: "rm -rf /" },
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
store.error_tool_part(
|
|
383
|
+
message_id: asst_id,
|
|
384
|
+
call_id: "call_002",
|
|
385
|
+
error: "permission denied",
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
msg = store.message(asst_id)
|
|
389
|
+
tool_part = msg[:parts].find { |p| p[:type] == "tool" }
|
|
390
|
+
expect(tool_part[:state][:status]).to eq("error")
|
|
391
|
+
expect(tool_part[:state][:error]).to eq("permission denied")
|
|
392
|
+
end
|
|
393
|
+
end
|
|
394
|
+
|
|
395
|
+
describe "#complete_assistant" do
|
|
396
|
+
it "sets completion time and token counts" do
|
|
397
|
+
asst_id = store.append_assistant
|
|
398
|
+
|
|
399
|
+
store.complete_assistant(
|
|
400
|
+
message_id: asst_id,
|
|
401
|
+
tokens: { input: 100, output: 50, reasoning: 10, cache: { read: 20, write: 5 } },
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
msg = store.message(asst_id)
|
|
405
|
+
expect(msg[:info][:time][:completed]).to be_a(Integer)
|
|
406
|
+
expect(msg[:info][:tokens][:input]).to eq(100)
|
|
407
|
+
expect(msg[:info][:tokens][:output]).to eq(50)
|
|
408
|
+
expect(msg[:info][:tokens][:reasoning]).to eq(10)
|
|
409
|
+
end
|
|
410
|
+
end
|
|
411
|
+
|
|
412
|
+
describe "#messages" do
|
|
413
|
+
it "returns all messages in order" do
|
|
414
|
+
store.append_user(text: "Q1")
|
|
415
|
+
store.append_assistant
|
|
416
|
+
store.append_user(text: "Q2")
|
|
417
|
+
|
|
418
|
+
msgs = store.messages
|
|
419
|
+
expect(msgs.size).to eq(3)
|
|
420
|
+
expect(msgs[0][:info][:role]).to eq("user")
|
|
421
|
+
expect(msgs[1][:info][:role]).to eq("assistant")
|
|
422
|
+
expect(msgs[2][:info][:role]).to eq("user")
|
|
423
|
+
end
|
|
424
|
+
end
|
|
425
|
+
|
|
426
|
+
describe "#count" do
|
|
427
|
+
it "returns the number of stored messages" do
|
|
428
|
+
expect(store.count).to eq(0)
|
|
429
|
+
|
|
430
|
+
store.append_user(text: "Q1")
|
|
431
|
+
expect(store.count).to eq(1)
|
|
432
|
+
|
|
433
|
+
store.append_assistant
|
|
434
|
+
expect(store.count).to eq(2)
|
|
435
|
+
end
|
|
436
|
+
end
|
|
437
|
+
|
|
438
|
+
describe "loading from disk" do
|
|
439
|
+
it "restores messages from existing files" do
|
|
440
|
+
store.append_user(text: "Persisted Q")
|
|
441
|
+
asst_id = store.append_assistant(model_id: "claude")
|
|
442
|
+
store.add_text_part(message_id: asst_id, text: "Persisted A")
|
|
443
|
+
|
|
444
|
+
# Create a new store from the same directory
|
|
445
|
+
store2 = described_class.new(session_id: session_id, dir: tmpdir)
|
|
446
|
+
|
|
447
|
+
expect(store2.count).to eq(2)
|
|
448
|
+
expect(store2.messages[0][:parts][0][:text]).to eq("Persisted Q")
|
|
449
|
+
expect(store2.messages[1][:parts][0][:text]).to eq("Persisted A")
|
|
450
|
+
end
|
|
451
|
+
|
|
452
|
+
it "continues sequence numbering from loaded messages" do
|
|
453
|
+
store.append_user(text: "Q1")
|
|
454
|
+
store.append_user(text: "Q2")
|
|
455
|
+
|
|
456
|
+
store2 = described_class.new(session_id: session_id, dir: tmpdir)
|
|
457
|
+
id = store2.append_user(text: "Q3")
|
|
458
|
+
|
|
459
|
+
expect(id).to eq("msg_0003")
|
|
460
|
+
end
|
|
461
|
+
end
|
|
462
|
+
end
|
|
463
|
+
end
|
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
if __FILE__ == $0
|
|
4
|
+
require "bundler/setup"
|
|
5
|
+
require "brute"
|
|
6
|
+
end
|
|
7
|
+
|
|
3
8
|
module Brute
|
|
4
9
|
module Middleware
|
|
5
10
|
# Checks context size after each LLM call and triggers compaction
|
|
@@ -57,3 +62,131 @@ module Brute
|
|
|
57
62
|
end
|
|
58
63
|
end
|
|
59
64
|
end
|
|
65
|
+
|
|
66
|
+
if __FILE__ == $0
|
|
67
|
+
require_relative "../../../spec/spec_helper"
|
|
68
|
+
|
|
69
|
+
RSpec.describe Brute::Middleware::CompactionCheck do
|
|
70
|
+
let(:response) { MockResponse.new(content: "compaction response") }
|
|
71
|
+
let(:inner_app) { ->(_env) { response } }
|
|
72
|
+
let(:compactor) { double("compactor") }
|
|
73
|
+
let(:system_prompt) { "You are a helpful assistant." }
|
|
74
|
+
let(:tools) { [] }
|
|
75
|
+
let(:middleware) do
|
|
76
|
+
described_class.new(inner_app, compactor: compactor, system_prompt: system_prompt, tools: tools)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
it "passes the response through when compaction is not needed" do
|
|
80
|
+
allow(compactor).to receive(:should_compact?).and_return(false)
|
|
81
|
+
env = build_env
|
|
82
|
+
|
|
83
|
+
result = middleware.call(env)
|
|
84
|
+
|
|
85
|
+
expect(result).to eq(response)
|
|
86
|
+
expect(env[:metadata][:compaction]).to be_nil
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
it "does not replace context when compaction is not triggered" do
|
|
90
|
+
allow(compactor).to receive(:should_compact?).and_return(false)
|
|
91
|
+
env = build_env
|
|
92
|
+
original_ctx = env[:context]
|
|
93
|
+
|
|
94
|
+
middleware.call(env)
|
|
95
|
+
|
|
96
|
+
expect(env[:context]).to equal(original_ctx)
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
it "triggers compaction and rebuilds context when threshold is exceeded" do
|
|
100
|
+
allow(compactor).to receive(:should_compact?).and_return(true)
|
|
101
|
+
allow(compactor).to receive(:compact).and_return(["Summary of conversation", []])
|
|
102
|
+
|
|
103
|
+
provider = MockProvider.new
|
|
104
|
+
ctx = LLM::Context.new(provider, tools: [])
|
|
105
|
+
prompt = ctx.prompt { |p| p.system("sys"); p.user("hello") }
|
|
106
|
+
ctx.talk(prompt)
|
|
107
|
+
|
|
108
|
+
env = build_env(context: ctx, provider: provider)
|
|
109
|
+
middleware.call(env)
|
|
110
|
+
|
|
111
|
+
expect(env[:metadata][:compaction]).to include(:messages_before, :timestamp)
|
|
112
|
+
expect(env[:context]).not_to equal(ctx)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
it "handles compactor returning nil gracefully" do
|
|
116
|
+
allow(compactor).to receive(:should_compact?).and_return(true)
|
|
117
|
+
allow(compactor).to receive(:compact).and_return(nil)
|
|
118
|
+
|
|
119
|
+
env = build_env
|
|
120
|
+
original_ctx = env[:context]
|
|
121
|
+
|
|
122
|
+
middleware.call(env)
|
|
123
|
+
|
|
124
|
+
expect(env[:context]).to equal(original_ctx)
|
|
125
|
+
expect(env[:metadata][:compaction]).to be_nil
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
context "when streaming is enabled" do
|
|
129
|
+
let(:stream) { double("AgentStream") }
|
|
130
|
+
|
|
131
|
+
let(:middleware_with_stream) do
|
|
132
|
+
described_class.new(inner_app,
|
|
133
|
+
compactor: compactor,
|
|
134
|
+
system_prompt: system_prompt,
|
|
135
|
+
tools: tools,
|
|
136
|
+
stream: stream,
|
|
137
|
+
)
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
it "preserves the stream parameter on the rebuilt context" do
|
|
141
|
+
allow(compactor).to receive(:should_compact?).and_return(true)
|
|
142
|
+
allow(compactor).to receive(:compact).and_return(["Summary of conversation", []])
|
|
143
|
+
|
|
144
|
+
provider = MockProvider.new
|
|
145
|
+
original_ctx = LLM::Context.new(provider, tools: [], stream: stream)
|
|
146
|
+
prompt = original_ctx.prompt { |p| p.system("sys"); p.user("hello") }
|
|
147
|
+
original_ctx.talk(prompt)
|
|
148
|
+
|
|
149
|
+
env = build_env(context: original_ctx, provider: provider, streaming: true)
|
|
150
|
+
middleware_with_stream.call(env)
|
|
151
|
+
|
|
152
|
+
new_ctx = env[:context]
|
|
153
|
+
expect(new_ctx).not_to equal(original_ctx)
|
|
154
|
+
|
|
155
|
+
ctx_params = new_ctx.instance_variable_get(:@params)
|
|
156
|
+
expect(ctx_params[:stream]).to eq(stream),
|
|
157
|
+
"Expected rebuilt context to have stream: #{stream.inspect} " \
|
|
158
|
+
"in @params, but got: #{ctx_params[:stream].inspect}. " \
|
|
159
|
+
"This causes on_content callbacks to silently stop firing after compaction."
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
it "fires on_content callback on the rebuilt context when streaming" do
|
|
163
|
+
received_content = nil
|
|
164
|
+
callback = ->(text) { received_content = text }
|
|
165
|
+
|
|
166
|
+
allow(compactor).to receive(:should_compact?).and_return(true)
|
|
167
|
+
allow(compactor).to receive(:compact).and_return(["Summary", []])
|
|
168
|
+
|
|
169
|
+
provider = MockProvider.new
|
|
170
|
+
original_ctx = LLM::Context.new(provider, tools: [], stream: stream)
|
|
171
|
+
prompt = original_ctx.prompt { |p| p.system("sys"); p.user("hello") }
|
|
172
|
+
original_ctx.talk(prompt)
|
|
173
|
+
|
|
174
|
+
env = build_env(
|
|
175
|
+
context: original_ctx,
|
|
176
|
+
provider: provider,
|
|
177
|
+
streaming: true,
|
|
178
|
+
callbacks: { on_content: callback },
|
|
179
|
+
)
|
|
180
|
+
middleware_with_stream.call(env)
|
|
181
|
+
|
|
182
|
+
new_ctx = env[:context]
|
|
183
|
+
|
|
184
|
+
ctx_params = new_ctx.instance_variable_get(:@params)
|
|
185
|
+
expect(ctx_params).to have_key(:stream),
|
|
186
|
+
"Rebuilt context is missing :stream in @params. " \
|
|
187
|
+
"LLMCall will skip the on_content fallback because env[:streaming] is true, " \
|
|
188
|
+
"so content from the next LLM call will be silently dropped."
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
end
|
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
if __FILE__ == $0
|
|
4
|
+
require "bundler/setup"
|
|
5
|
+
require "brute"
|
|
6
|
+
end
|
|
7
|
+
|
|
3
8
|
module Brute
|
|
4
9
|
module Middleware
|
|
5
10
|
# Detects when the agent is stuck repeating tool call patterns and injects
|
|
@@ -31,3 +36,98 @@ module Brute
|
|
|
31
36
|
end
|
|
32
37
|
end
|
|
33
38
|
end
|
|
39
|
+
|
|
40
|
+
if __FILE__ == $0
|
|
41
|
+
require_relative "../../../spec/spec_helper"
|
|
42
|
+
|
|
43
|
+
RSpec.describe Brute::Middleware::DoomLoopDetection do
|
|
44
|
+
let(:response) { MockResponse.new(content: "loop check") }
|
|
45
|
+
let(:inner_app) { ->(_env) { response } }
|
|
46
|
+
|
|
47
|
+
# Build a fake assistant message whose .functions returns the given list.
|
|
48
|
+
def assistant_msg_with_functions(function_list)
|
|
49
|
+
msg = LLM::Message.new(:assistant, "tool msg", {})
|
|
50
|
+
allow(msg).to receive(:functions).and_return(function_list)
|
|
51
|
+
msg
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def fake_function(name:, arguments:)
|
|
55
|
+
double("fn", name: name, arguments: arguments)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
it "passes through when no doom loop is detected" do
|
|
59
|
+
middleware = described_class.new(inner_app, threshold: 3)
|
|
60
|
+
env = build_env
|
|
61
|
+
|
|
62
|
+
result = middleware.call(env)
|
|
63
|
+
|
|
64
|
+
expect(result).to eq(response)
|
|
65
|
+
expect(env[:metadata][:doom_loop_detected]).to be_nil
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
it "detects consecutive identical tool calls" do
|
|
69
|
+
provider = MockProvider.new
|
|
70
|
+
ctx = LLM::Context.new(provider, tools: [])
|
|
71
|
+
|
|
72
|
+
fn = fake_function(name: "fs_read", arguments: '{"path":"x.rb"}')
|
|
73
|
+
messages = 4.times.map { assistant_msg_with_functions([fn]) }
|
|
74
|
+
|
|
75
|
+
allow(ctx).to receive(:messages).and_return(double("buffer", to_a: messages))
|
|
76
|
+
allow(ctx).to receive(:talk)
|
|
77
|
+
|
|
78
|
+
middleware = described_class.new(inner_app, threshold: 3)
|
|
79
|
+
env = build_env(context: ctx, provider: provider)
|
|
80
|
+
|
|
81
|
+
middleware.call(env)
|
|
82
|
+
|
|
83
|
+
expect(env[:metadata][:doom_loop_detected]).not_to be_nil
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
it "detects repeating sequences [A,B,A,B,A,B]" do
|
|
87
|
+
provider = MockProvider.new
|
|
88
|
+
ctx = LLM::Context.new(provider, tools: [])
|
|
89
|
+
|
|
90
|
+
fn_a = fake_function(name: "fs_read", arguments: '{"path":"a.rb"}')
|
|
91
|
+
fn_b = fake_function(name: "shell", arguments: '{"cmd":"ls"}')
|
|
92
|
+
messages = 3.times.flat_map do
|
|
93
|
+
[assistant_msg_with_functions([fn_a]), assistant_msg_with_functions([fn_b])]
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
allow(ctx).to receive(:messages).and_return(double("buffer", to_a: messages))
|
|
97
|
+
allow(ctx).to receive(:talk)
|
|
98
|
+
|
|
99
|
+
middleware = described_class.new(inner_app, threshold: 3)
|
|
100
|
+
env = build_env(context: ctx, provider: provider)
|
|
101
|
+
|
|
102
|
+
middleware.call(env)
|
|
103
|
+
|
|
104
|
+
expect(env[:metadata][:doom_loop_detected]).not_to be_nil
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
it "does not trigger below the threshold" do
|
|
108
|
+
provider = MockProvider.new
|
|
109
|
+
ctx = LLM::Context.new(provider, tools: [])
|
|
110
|
+
|
|
111
|
+
fn = fake_function(name: "fs_read", arguments: '{"path":"x.rb"}')
|
|
112
|
+
messages = 2.times.map { assistant_msg_with_functions([fn]) }
|
|
113
|
+
|
|
114
|
+
allow(ctx).to receive(:messages).and_return(double("buffer", to_a: messages))
|
|
115
|
+
|
|
116
|
+
middleware = described_class.new(inner_app, threshold: 3)
|
|
117
|
+
env = build_env(context: ctx, provider: provider)
|
|
118
|
+
|
|
119
|
+
middleware.call(env)
|
|
120
|
+
|
|
121
|
+
expect(env[:metadata][:doom_loop_detected]).to be_nil
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
describe Brute::DoomLoopDetector do
|
|
125
|
+
it "generates a warning message with repetition count" do
|
|
126
|
+
detector = described_class.new(threshold: 3)
|
|
127
|
+
msg = detector.warning_message(5)
|
|
128
|
+
expect(msg).to include("Doom loop detected")
|
|
129
|
+
expect(msg).to include("5 times")
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
end
|