brute 1.0.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/brute/agent.rb +72 -6
- data/lib/brute/events/handler.rb +69 -0
- data/lib/brute/events/prefixed_terminal_output.rb +72 -0
- data/lib/brute/events/terminal_output_handler.rb +68 -0
- data/lib/brute/middleware/001_otel_span.rb +77 -0
- data/lib/brute/middleware/003_tool_result_loop.rb +103 -0
- data/lib/brute/middleware/004_summarize.rb +139 -0
- data/lib/brute/middleware/005_tracing.rb +86 -0
- data/lib/brute/middleware/010_max_iterations.rb +73 -0
- data/lib/brute/middleware/015_otel_token_usage.rb +42 -0
- data/lib/brute/middleware/020_system_prompt.rb +128 -0
- data/lib/brute/middleware/040_compaction_check.rb +155 -0
- data/lib/brute/middleware/060_questions.rb +41 -0
- data/lib/brute/middleware/070_tool_call.rb +247 -0
- data/lib/brute/middleware/073_otel_tool_call.rb +49 -0
- data/lib/brute/middleware/075_otel_tool_results.rb +46 -0
- data/lib/brute/middleware/100_llm_call.rb +62 -0
- data/lib/brute/middleware/event_handler.rb +25 -0
- data/lib/brute/middleware/user_queue.rb +35 -0
- data/lib/brute/pipeline.rb +44 -107
- data/lib/brute/prompts/skills.rb +2 -2
- data/lib/brute/prompts.rb +23 -23
- data/lib/brute/providers/shell.rb +6 -19
- data/lib/brute/providers/shell_response.rb +22 -30
- data/lib/brute/session.rb +52 -0
- data/lib/brute/store/snapshot_store.rb +21 -37
- data/lib/brute/sub_agent.rb +106 -0
- data/lib/brute/system_prompt.rb +1 -83
- data/lib/brute/tool.rb +107 -0
- data/lib/brute/tools/delegate.rb +61 -70
- data/lib/brute/tools/fs_patch.rb +9 -7
- data/lib/brute/tools/fs_read.rb +233 -20
- data/lib/brute/tools/fs_remove.rb +8 -9
- data/lib/brute/tools/fs_search.rb +98 -16
- data/lib/brute/tools/fs_undo.rb +8 -8
- data/lib/brute/tools/fs_write.rb +7 -5
- data/lib/brute/tools/net_fetch.rb +8 -8
- data/lib/brute/tools/question.rb +36 -24
- data/lib/brute/tools/shell.rb +74 -16
- data/lib/brute/tools/todo_read.rb +8 -8
- data/lib/brute/tools/todo_write.rb +25 -18
- data/lib/brute/tools.rb +8 -12
- data/lib/brute/truncation.rb +219 -0
- data/lib/brute/version.rb +1 -1
- data/lib/brute.rb +82 -45
- metadata +59 -46
- data/lib/brute/loop/agent_stream.rb +0 -118
- data/lib/brute/loop/agent_turn.rb +0 -520
- data/lib/brute/loop/compactor.rb +0 -107
- data/lib/brute/loop/doom_loop.rb +0 -86
- data/lib/brute/loop/step.rb +0 -332
- data/lib/brute/loop/tool_call_step.rb +0 -90
- data/lib/brute/middleware/base.rb +0 -27
- data/lib/brute/middleware/compaction_check.rb +0 -106
- data/lib/brute/middleware/doom_loop_detection.rb +0 -136
- data/lib/brute/middleware/llm_call.rb +0 -128
- data/lib/brute/middleware/message_tracking.rb +0 -339
- data/lib/brute/middleware/otel/span.rb +0 -105
- data/lib/brute/middleware/otel/token_usage.rb +0 -68
- data/lib/brute/middleware/otel/tool_calls.rb +0 -68
- data/lib/brute/middleware/otel/tool_results.rb +0 -65
- data/lib/brute/middleware/otel.rb +0 -34
- data/lib/brute/middleware/reasoning_normalizer.rb +0 -192
- data/lib/brute/middleware/retry.rb +0 -157
- data/lib/brute/middleware/session_persistence.rb +0 -72
- data/lib/brute/middleware/token_tracking.rb +0 -124
- data/lib/brute/middleware/tool_error_tracking.rb +0 -179
- data/lib/brute/middleware/tool_use_guard.rb +0 -133
- data/lib/brute/middleware/tracing.rb +0 -124
- data/lib/brute/middleware.rb +0 -18
- data/lib/brute/orchestrator/turn.rb +0 -105
- data/lib/brute/patches/anthropic_tool_role.rb +0 -35
- data/lib/brute/patches/buffer_nil_guard.rb +0 -26
- data/lib/brute/providers/models_dev.rb +0 -111
- data/lib/brute/providers/ollama.rb +0 -135
- data/lib/brute/providers/opencode_go.rb +0 -43
- data/lib/brute/providers/opencode_zen.rb +0 -87
- data/lib/brute/providers.rb +0 -62
- data/lib/brute/queue/base_queue.rb +0 -222
- data/lib/brute/queue/parallel_queue.rb +0 -66
- data/lib/brute/queue/sequential_queue.rb +0 -63
- data/lib/brute/store/message_store.rb +0 -362
- data/lib/brute/store/session.rb +0 -106
- /data/lib/brute/{diff.rb → utils/diff.rb} +0 -0
|
@@ -1,520 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require "bundler/setup"
|
|
4
|
-
require "brute"
|
|
5
|
-
|
|
6
|
-
module Brute
|
|
7
|
-
module Loop
|
|
8
|
-
# Factory + namespace for provider-specific agent turns.
|
|
9
|
-
#
|
|
10
|
-
# An agent turn sends a message to the LLM, iterates over tool calls
|
|
11
|
-
# until there are none left, and returns the response. Each turn has
|
|
12
|
-
# its own job queue for tool execution (ParallelQueue of ToolCallSteps).
|
|
13
|
-
#
|
|
14
|
-
# Usage:
|
|
15
|
-
#
|
|
16
|
-
# step = AgentTurn.perform(agent:, session:, pipeline:, input:)
|
|
17
|
-
#
|
|
18
|
-
# AgentTurn.perform detects the provider from the agent and returns
|
|
19
|
-
# the appropriate provider-specific Step subclass, already executed.
|
|
20
|
-
# The returned step has .state, .result, .error, etc.
|
|
21
|
-
#
|
|
22
|
-
# Provider-specific subclasses live under AgentTurn:: and override
|
|
23
|
-
# supported_messages to filter the session's message history per
|
|
24
|
-
# provider capability.
|
|
25
|
-
#
|
|
26
|
-
module AgentTurn
|
|
27
|
-
# Build and return the right AgentTurn step for this agent's provider.
|
|
28
|
-
# Does NOT execute it — call step.call(task) yourself, or enqueue it.
|
|
29
|
-
def self.new(agent:, session:, pipeline:, input: nil, callbacks: {}, **rest)
|
|
30
|
-
klass = detect(agent.provider)
|
|
31
|
-
klass.new(agent: agent, session: session, pipeline: pipeline, input: input, callbacks: callbacks, **rest)
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
# Build, execute inside a Sync block, return the finished step.
|
|
35
|
-
def self.perform(agent:, session:, pipeline:, input: nil, callbacks: {}, **rest)
|
|
36
|
-
step = self.new(agent: agent, session: session, pipeline: pipeline, input: input, callbacks: callbacks, **rest)
|
|
37
|
-
Sync do
|
|
38
|
-
step.call(Async::Task.current)
|
|
39
|
-
end
|
|
40
|
-
step
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
# Detect the right subclass from the provider.
|
|
44
|
-
def self.detect(provider)
|
|
45
|
-
if provider
|
|
46
|
-
provider.class.name.to_s.downcase.then do |class_name|
|
|
47
|
-
if class_name.include?("anthropic")
|
|
48
|
-
Anthropic
|
|
49
|
-
elsif class_name.include?("openai")
|
|
50
|
-
OpenAI
|
|
51
|
-
elsif class_name.include?("google") || class_name.include?("gemini")
|
|
52
|
-
Google
|
|
53
|
-
else
|
|
54
|
-
Base
|
|
55
|
-
end
|
|
56
|
-
end
|
|
57
|
-
else
|
|
58
|
-
Base
|
|
59
|
-
end
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
# The default implementation. Works for any provider.
|
|
63
|
-
# Provider-specific subclasses override supported_messages
|
|
64
|
-
# and anything else that differs.
|
|
65
|
-
#
|
|
66
|
-
# LLM::Context is built fresh for each pipeline call by the LLMCall
|
|
67
|
-
# middleware. The agent turn owns the conversation state via
|
|
68
|
-
# env[:messages] (an Array<LLM::Message>).
|
|
69
|
-
#
|
|
70
|
-
# Supports two modes:
|
|
71
|
-
#
|
|
72
|
-
# Non-streaming (default): text arrives after the LLM call completes,
|
|
73
|
-
# on_content fires post-hoc via LLMCall middleware, tool calls come
|
|
74
|
-
# from env[:pending_functions].
|
|
75
|
-
#
|
|
76
|
-
# Streaming: enabled when on_content or on_reasoning callbacks are
|
|
77
|
-
# present. Text/reasoning fire incrementally via AgentStream. Tool
|
|
78
|
-
# calls are deferred during the stream and collected afterward from
|
|
79
|
-
# the stream's pending_tools.
|
|
80
|
-
#
|
|
81
|
-
# Callbacks:
|
|
82
|
-
#
|
|
83
|
-
# on_content: ->(text) {} # text chunk (streaming) or full text (non-streaming)
|
|
84
|
-
# on_reasoning: ->(text) {} # reasoning/thinking chunk (streaming only)
|
|
85
|
-
# on_tool_call_start: ->(batch) {} # [{name:, arguments:}, ...] before tool execution
|
|
86
|
-
# on_tool_result: ->(name, r) {} # per-tool, after each completes
|
|
87
|
-
# on_question: ->(questions, queue) {} # interactive; push answers onto queue
|
|
88
|
-
#
|
|
89
|
-
class Base < Step
|
|
90
|
-
MAX_ITERATIONS = 100
|
|
91
|
-
|
|
92
|
-
attr_reader :agent, :session
|
|
93
|
-
|
|
94
|
-
def initialize(agent:, session:, pipeline:, input: nil, callbacks: {}, **rest)
|
|
95
|
-
super(**rest)
|
|
96
|
-
@agent = agent
|
|
97
|
-
@session = session
|
|
98
|
-
@pipeline = pipeline
|
|
99
|
-
@input = input
|
|
100
|
-
@callbacks = callbacks
|
|
101
|
-
|
|
102
|
-
# Create streaming bridge when content or reasoning callbacks are
|
|
103
|
-
# present. The stream is passed into env so LLMCall can wire it
|
|
104
|
-
# into each fresh LLM::Context.
|
|
105
|
-
if @callbacks[:on_content] || @callbacks[:on_reasoning]
|
|
106
|
-
@stream = AgentStream.new(
|
|
107
|
-
on_content: @callbacks[:on_content],
|
|
108
|
-
on_reasoning: @callbacks[:on_reasoning],
|
|
109
|
-
on_question: @callbacks[:on_question],
|
|
110
|
-
)
|
|
111
|
-
end
|
|
112
|
-
end
|
|
113
|
-
|
|
114
|
-
def perform(task)
|
|
115
|
-
env = build_env
|
|
116
|
-
|
|
117
|
-
# First LLM call
|
|
118
|
-
env[:input] = build_initial_input(@input)
|
|
119
|
-
env[:tool_results] = nil
|
|
120
|
-
response = @pipeline.call(env)
|
|
121
|
-
|
|
122
|
-
iterations = 0
|
|
123
|
-
while !env[:should_exit] &&
|
|
124
|
-
(pending = collect_pending_tools(env)).any? &&
|
|
125
|
-
iterations < MAX_ITERATIONS
|
|
126
|
-
|
|
127
|
-
# Fire on_tool_call_start with the full batch
|
|
128
|
-
@callbacks[:on_tool_call_start]&.call(
|
|
129
|
-
pending.map { |fn, _| { name: fn.name, arguments: fn.arguments } }
|
|
130
|
-
)
|
|
131
|
-
|
|
132
|
-
# Partition: question tools run sequentially on this fiber,
|
|
133
|
-
# all others run in parallel via the sub-queue.
|
|
134
|
-
questions, others = pending.partition { |fn, _| fn.name == "question" }
|
|
135
|
-
|
|
136
|
-
results = []
|
|
137
|
-
|
|
138
|
-
# Questions first — sequential, blocking, with on_question fiber-local
|
|
139
|
-
questions.each do |fn, err|
|
|
140
|
-
if err
|
|
141
|
-
@callbacks[:on_tool_result]&.call(err.name, result_value(err))
|
|
142
|
-
results << err
|
|
143
|
-
else
|
|
144
|
-
Thread.current[:on_question] = @callbacks[:on_question]
|
|
145
|
-
result = fn.call
|
|
146
|
-
@callbacks[:on_tool_result]&.call(fn.name, result_value(result))
|
|
147
|
-
results << result
|
|
148
|
-
end
|
|
149
|
-
end
|
|
150
|
-
|
|
151
|
-
# Others — into the parallel queue
|
|
152
|
-
if others.any?
|
|
153
|
-
errors, executable = others.partition { |_, err| err }
|
|
154
|
-
|
|
155
|
-
# Record pre-existing errors (from stream's on_tool_call)
|
|
156
|
-
errors.each do |_, err|
|
|
157
|
-
@callbacks[:on_tool_result]&.call(err.name, result_value(err))
|
|
158
|
-
results << err
|
|
159
|
-
end
|
|
160
|
-
|
|
161
|
-
if executable.any?
|
|
162
|
-
tool_steps = executable.map { |fn, _| ToolCallStep.new(function: fn) }
|
|
163
|
-
tool_steps.each { |s| jobs(type: Brute::Queue::ParallelQueue) << s }
|
|
164
|
-
jobs.drain
|
|
165
|
-
|
|
166
|
-
tool_steps.each do |s|
|
|
167
|
-
val = s.state == :completed ? s.result : s.error
|
|
168
|
-
@callbacks[:on_tool_result]&.call(s.function.name, result_value(val))
|
|
169
|
-
results << val
|
|
170
|
-
end
|
|
171
|
-
end
|
|
172
|
-
end
|
|
173
|
-
|
|
174
|
-
# Feed results back to LLM
|
|
175
|
-
env[:input] = results
|
|
176
|
-
env[:tool_results] = results.filter_map { |r|
|
|
177
|
-
name = r.respond_to?(:name) ? r.name : "unknown"
|
|
178
|
-
[name, result_value(r)]
|
|
179
|
-
}
|
|
180
|
-
response = @pipeline.call(env)
|
|
181
|
-
|
|
182
|
-
# Re-create sub-queue for next iteration's tool calls
|
|
183
|
-
@mutex.synchronize { @jobs = nil }
|
|
184
|
-
iterations += 1
|
|
185
|
-
end
|
|
186
|
-
|
|
187
|
-
response
|
|
188
|
-
end
|
|
189
|
-
|
|
190
|
-
# Override in subclasses to filter message types per provider.
|
|
191
|
-
# Default: all messages pass through.
|
|
192
|
-
def supported_messages(messages)
|
|
193
|
-
messages
|
|
194
|
-
end
|
|
195
|
-
|
|
196
|
-
private
|
|
197
|
-
|
|
198
|
-
def build_env
|
|
199
|
-
{
|
|
200
|
-
provider: @agent.provider,
|
|
201
|
-
model: @agent.model,
|
|
202
|
-
input: nil,
|
|
203
|
-
tools: @agent.tools,
|
|
204
|
-
messages: [],
|
|
205
|
-
stream: @stream,
|
|
206
|
-
params: {},
|
|
207
|
-
metadata: {},
|
|
208
|
-
tool_results: nil,
|
|
209
|
-
streaming: !!@stream,
|
|
210
|
-
callbacks: @callbacks,
|
|
211
|
-
should_exit: nil,
|
|
212
|
-
pending_functions: [],
|
|
213
|
-
}
|
|
214
|
-
end
|
|
215
|
-
|
|
216
|
-
def build_initial_input(user_message)
|
|
217
|
-
sys = @agent.system_prompt
|
|
218
|
-
LLM::Prompt.new(@agent.provider) do |p|
|
|
219
|
-
p.system(sys) if sys
|
|
220
|
-
p.user(user_message) if user_message
|
|
221
|
-
end
|
|
222
|
-
end
|
|
223
|
-
|
|
224
|
-
# Collect pending tool calls from the stream (streaming mode) or
|
|
225
|
-
# from env[:pending_functions] (set by LLMCall after each call).
|
|
226
|
-
#
|
|
227
|
-
# Returns [(function, error_or_nil), ...] pairs.
|
|
228
|
-
# Clears the stream's deferred state after consumption.
|
|
229
|
-
def collect_pending_tools(env)
|
|
230
|
-
if @stream&.pending_tools&.any?
|
|
231
|
-
@stream.pending_tools.dup.tap { @stream.clear_pending_tools! }
|
|
232
|
-
elsif env[:pending_functions]&.any?
|
|
233
|
-
env[:pending_functions].dup.tap { env[:pending_functions] = [] }.map { |fn| [fn, nil] }
|
|
234
|
-
else
|
|
235
|
-
[]
|
|
236
|
-
end
|
|
237
|
-
end
|
|
238
|
-
|
|
239
|
-
def result_value(result)
|
|
240
|
-
result.respond_to?(:value) ? result.value : result
|
|
241
|
-
end
|
|
242
|
-
end
|
|
243
|
-
|
|
244
|
-
# Provider-specific subclasses. Override supported_messages
|
|
245
|
-
# or loop behavior as needed.
|
|
246
|
-
|
|
247
|
-
class Anthropic < Base
|
|
248
|
-
end
|
|
249
|
-
|
|
250
|
-
class OpenAI < Base
|
|
251
|
-
end
|
|
252
|
-
|
|
253
|
-
class Google < Base
|
|
254
|
-
end
|
|
255
|
-
end
|
|
256
|
-
end
|
|
257
|
-
end
|
|
258
|
-
|
|
259
|
-
test do
|
|
260
|
-
require_relative "../../../spec/support/mock_provider"
|
|
261
|
-
require_relative "../../../spec/support/mock_response"
|
|
262
|
-
|
|
263
|
-
class RecordingPipeline
|
|
264
|
-
attr_reader :calls
|
|
265
|
-
def initialize(responses: [])
|
|
266
|
-
@responses = responses
|
|
267
|
-
@calls = []
|
|
268
|
-
@index = 0
|
|
269
|
-
end
|
|
270
|
-
|
|
271
|
-
def call(env)
|
|
272
|
-
@calls << env[:input]
|
|
273
|
-
resp = @responses[@index] || @responses.last
|
|
274
|
-
@index += 1
|
|
275
|
-
resp
|
|
276
|
-
end
|
|
277
|
-
end
|
|
278
|
-
|
|
279
|
-
FakeResponse = Struct.new(:content)
|
|
280
|
-
|
|
281
|
-
def make_agent(provider: MockProvider.new, tools: [])
|
|
282
|
-
Brute::Agent.new(provider: provider, model: nil, tools: tools)
|
|
283
|
-
end
|
|
284
|
-
|
|
285
|
-
# -- factory detection --
|
|
286
|
-
|
|
287
|
-
it "detects Base for unknown providers" do
|
|
288
|
-
Brute::Loop::AgentTurn.detect(MockProvider.new).should == Brute::Loop::AgentTurn::Base
|
|
289
|
-
end
|
|
290
|
-
|
|
291
|
-
it "detects Anthropic from provider class name" do
|
|
292
|
-
provider = MockProvider.new
|
|
293
|
-
def provider.class; Class.new { def self.name; "LLM::Anthropic"; end }; end
|
|
294
|
-
Brute::Loop::AgentTurn.detect(provider).should == Brute::Loop::AgentTurn::Anthropic
|
|
295
|
-
end
|
|
296
|
-
|
|
297
|
-
it "detects OpenAI from provider class name" do
|
|
298
|
-
provider = MockProvider.new
|
|
299
|
-
def provider.class; Class.new { def self.name; "LLM::OpenAI"; end }; end
|
|
300
|
-
Brute::Loop::AgentTurn.detect(provider).should == Brute::Loop::AgentTurn::OpenAI
|
|
301
|
-
end
|
|
302
|
-
|
|
303
|
-
it "detects Google from provider class name" do
|
|
304
|
-
provider = MockProvider.new
|
|
305
|
-
def provider.class; Class.new { def self.name; "LLM::Google"; end }; end
|
|
306
|
-
Brute::Loop::AgentTurn.detect(provider).should == Brute::Loop::AgentTurn::Google
|
|
307
|
-
end
|
|
308
|
-
|
|
309
|
-
# -- AgentTurn.new returns the right subclass --
|
|
310
|
-
|
|
311
|
-
it "returns Base instance for unknown provider" do
|
|
312
|
-
step = Brute::Loop::AgentTurn.new(
|
|
313
|
-
agent: make_agent,
|
|
314
|
-
session: Brute::Store::Session.new,
|
|
315
|
-
pipeline: RecordingPipeline.new(responses: []),
|
|
316
|
-
input: "hi",
|
|
317
|
-
)
|
|
318
|
-
step.should.be.kind_of Brute::Loop::AgentTurn::Base
|
|
319
|
-
end
|
|
320
|
-
|
|
321
|
-
# -- basic turn execution --
|
|
322
|
-
|
|
323
|
-
it "calls the pipeline" do
|
|
324
|
-
Sync do
|
|
325
|
-
pipeline = RecordingPipeline.new(responses: [FakeResponse.new("hello")])
|
|
326
|
-
step = Brute::Loop::AgentTurn.new(
|
|
327
|
-
agent: make_agent,
|
|
328
|
-
session: Brute::Store::Session.new,
|
|
329
|
-
pipeline: pipeline,
|
|
330
|
-
input: "hi",
|
|
331
|
-
)
|
|
332
|
-
step.call(Async::Task.current)
|
|
333
|
-
pipeline.calls.size.should == 1
|
|
334
|
-
end
|
|
335
|
-
end
|
|
336
|
-
|
|
337
|
-
it "returns the LLM response as result" do
|
|
338
|
-
Sync do
|
|
339
|
-
pipeline = RecordingPipeline.new(responses: [FakeResponse.new("world")])
|
|
340
|
-
step = Brute::Loop::AgentTurn.new(
|
|
341
|
-
agent: make_agent,
|
|
342
|
-
session: Brute::Store::Session.new,
|
|
343
|
-
pipeline: pipeline,
|
|
344
|
-
input: "hi",
|
|
345
|
-
)
|
|
346
|
-
step.call(Async::Task.current)
|
|
347
|
-
step.result.content.should == "world"
|
|
348
|
-
end
|
|
349
|
-
end
|
|
350
|
-
|
|
351
|
-
it "transitions to completed" do
|
|
352
|
-
Sync do
|
|
353
|
-
pipeline = RecordingPipeline.new(responses: [FakeResponse.new("ok")])
|
|
354
|
-
step = Brute::Loop::AgentTurn.new(
|
|
355
|
-
agent: make_agent,
|
|
356
|
-
session: Brute::Store::Session.new,
|
|
357
|
-
pipeline: pipeline,
|
|
358
|
-
input: "hi",
|
|
359
|
-
)
|
|
360
|
-
step.call(Async::Task.current)
|
|
361
|
-
step.state.should == :completed
|
|
362
|
-
end
|
|
363
|
-
end
|
|
364
|
-
|
|
365
|
-
# -- AgentTurn.perform convenience --
|
|
366
|
-
|
|
367
|
-
it "perform returns a completed step" do
|
|
368
|
-
pipeline = RecordingPipeline.new(responses: [FakeResponse.new("done")])
|
|
369
|
-
step = Brute::Loop::AgentTurn.perform(
|
|
370
|
-
agent: make_agent,
|
|
371
|
-
session: Brute::Store::Session.new,
|
|
372
|
-
pipeline: pipeline,
|
|
373
|
-
input: "hi",
|
|
374
|
-
)
|
|
375
|
-
step.state.should == :completed
|
|
376
|
-
end
|
|
377
|
-
|
|
378
|
-
# -- cancellation --
|
|
379
|
-
|
|
380
|
-
it "is cancellable when pending" do
|
|
381
|
-
step = Brute::Loop::AgentTurn.new(
|
|
382
|
-
agent: Brute::Agent.new(provider: nil, model: nil, tools: []),
|
|
383
|
-
session: Brute::Store::Session.new,
|
|
384
|
-
pipeline: RecordingPipeline.new(responses: []),
|
|
385
|
-
input: "hi",
|
|
386
|
-
)
|
|
387
|
-
step.cancel
|
|
388
|
-
step.state.should == :cancelled
|
|
389
|
-
end
|
|
390
|
-
|
|
391
|
-
# -- system prompt from agent --
|
|
392
|
-
|
|
393
|
-
it "uses agent system_prompt" do
|
|
394
|
-
Sync do
|
|
395
|
-
agent = Brute::Agent.new(
|
|
396
|
-
provider: MockProvider.new,
|
|
397
|
-
model: nil,
|
|
398
|
-
tools: [],
|
|
399
|
-
system_prompt: "You are a test bot",
|
|
400
|
-
)
|
|
401
|
-
pipeline = RecordingPipeline.new(responses: [FakeResponse.new("ok")])
|
|
402
|
-
step = Brute::Loop::AgentTurn.new(
|
|
403
|
-
agent: agent,
|
|
404
|
-
session: Brute::Store::Session.new,
|
|
405
|
-
pipeline: pipeline,
|
|
406
|
-
input: "hi",
|
|
407
|
-
)
|
|
408
|
-
step.call(Async::Task.current)
|
|
409
|
-
step.state.should == :completed
|
|
410
|
-
end
|
|
411
|
-
end
|
|
412
|
-
|
|
413
|
-
# -- should_exit loop break --
|
|
414
|
-
|
|
415
|
-
# A mock function that satisfies ToolCallStep's interface.
|
|
416
|
-
LoopTestFunction = Struct.new(:id, :name, :arguments, keyword_init: true) do
|
|
417
|
-
def call; self; end
|
|
418
|
-
def value; "tool_result"; end
|
|
419
|
-
end
|
|
420
|
-
|
|
421
|
-
# Pipeline that injects pending_functions and optionally sets should_exit.
|
|
422
|
-
class ShouldExitPipeline
|
|
423
|
-
attr_reader :call_count
|
|
424
|
-
|
|
425
|
-
def initialize(exit_on_call: nil)
|
|
426
|
-
@exit_on_call = exit_on_call
|
|
427
|
-
@call_count = 0
|
|
428
|
-
@fn = LoopTestFunction.new(id: "call_1", name: "test_tool", arguments: "{}")
|
|
429
|
-
end
|
|
430
|
-
|
|
431
|
-
def call(env)
|
|
432
|
-
@call_count += 1
|
|
433
|
-
|
|
434
|
-
# Always give pending functions so the loop would continue.
|
|
435
|
-
env[:pending_functions] = [@fn]
|
|
436
|
-
|
|
437
|
-
if @exit_on_call && @call_count >= @exit_on_call
|
|
438
|
-
env[:should_exit] = {
|
|
439
|
-
reason: "test_exit",
|
|
440
|
-
message: "forced exit for test",
|
|
441
|
-
source: "ShouldExitPipeline",
|
|
442
|
-
}
|
|
443
|
-
end
|
|
444
|
-
|
|
445
|
-
FakeResponse.new("response #{@call_count}")
|
|
446
|
-
end
|
|
447
|
-
end
|
|
448
|
-
|
|
449
|
-
it "breaks the loop when should_exit is set on the initial call" do
|
|
450
|
-
Sync do
|
|
451
|
-
pipeline = ShouldExitPipeline.new(exit_on_call: 1)
|
|
452
|
-
step = Brute::Loop::AgentTurn.new(
|
|
453
|
-
agent: make_agent,
|
|
454
|
-
session: Brute::Store::Session.new,
|
|
455
|
-
pipeline: pipeline,
|
|
456
|
-
input: "hi",
|
|
457
|
-
)
|
|
458
|
-
step.call(Async::Task.current)
|
|
459
|
-
|
|
460
|
-
# Pipeline called once (initial call). The loop never entered
|
|
461
|
-
# because should_exit was set before the while guard.
|
|
462
|
-
pipeline.call_count.should == 1
|
|
463
|
-
step.state.should == :completed
|
|
464
|
-
end
|
|
465
|
-
end
|
|
466
|
-
|
|
467
|
-
it "breaks the loop mid-iteration when should_exit is set" do
|
|
468
|
-
Sync do
|
|
469
|
-
# exit_on_call: 2 means the first call returns tools (loop enters),
|
|
470
|
-
# the second call (inside the loop) sets should_exit.
|
|
471
|
-
pipeline = ShouldExitPipeline.new(exit_on_call: 2)
|
|
472
|
-
step = Brute::Loop::AgentTurn.new(
|
|
473
|
-
agent: make_agent,
|
|
474
|
-
session: Brute::Store::Session.new,
|
|
475
|
-
pipeline: pipeline,
|
|
476
|
-
input: "hi",
|
|
477
|
-
)
|
|
478
|
-
step.call(Async::Task.current)
|
|
479
|
-
|
|
480
|
-
# Two calls: initial + one loop iteration. The loop did not
|
|
481
|
-
# continue to a third call because should_exit was set.
|
|
482
|
-
pipeline.call_count.should == 2
|
|
483
|
-
step.state.should == :completed
|
|
484
|
-
end
|
|
485
|
-
end
|
|
486
|
-
|
|
487
|
-
it "loops normally when should_exit is not set" do
|
|
488
|
-
Sync do
|
|
489
|
-
call_count = 0
|
|
490
|
-
fn = LoopTestFunction.new(id: "call_1", name: "test_tool", arguments: "{}")
|
|
491
|
-
|
|
492
|
-
pipeline_obj = Object.new
|
|
493
|
-
pipeline_obj.define_singleton_method(:call_count) { call_count }
|
|
494
|
-
pipeline_obj.define_singleton_method(:call) do |env|
|
|
495
|
-
call_count += 1
|
|
496
|
-
if call_count <= 3
|
|
497
|
-
env[:pending_functions] = [fn]
|
|
498
|
-
else
|
|
499
|
-
env[:pending_functions] = []
|
|
500
|
-
end
|
|
501
|
-
FakeResponse.new("response #{call_count}")
|
|
502
|
-
end
|
|
503
|
-
|
|
504
|
-
step = Brute::Loop::AgentTurn.new(
|
|
505
|
-
agent: make_agent,
|
|
506
|
-
session: Brute::Store::Session.new,
|
|
507
|
-
pipeline: pipeline_obj,
|
|
508
|
-
input: "hi",
|
|
509
|
-
)
|
|
510
|
-
step.call(Async::Task.current)
|
|
511
|
-
|
|
512
|
-
# Call 1 (initial) → pending_functions has fn → loop enters
|
|
513
|
-
# Loop iter 1: execute tools, call pipeline (call 2) → still has fn → continues
|
|
514
|
-
# Loop iter 2: execute tools, call pipeline (call 3) → still has fn → continues
|
|
515
|
-
# Loop iter 3: execute tools, call pipeline (call 4) → empty → exits
|
|
516
|
-
call_count.should == 4
|
|
517
|
-
step.state.should == :completed
|
|
518
|
-
end
|
|
519
|
-
end
|
|
520
|
-
end
|
data/lib/brute/loop/compactor.rb
DELETED
|
@@ -1,107 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Brute
|
|
4
|
-
module Loop
|
|
5
|
-
# Context compaction service. When the conversation grows past configurable
|
|
6
|
-
# thresholds, older messages are summarized into a condensed form and the
|
|
7
|
-
# original messages are dropped, keeping the context window manageable.
|
|
8
|
-
#
|
|
9
|
-
# Modeled after forgecode's Compactor which uses an eviction window and
|
|
10
|
-
# retention window strategy.
|
|
11
|
-
class Compactor
|
|
12
|
-
DEFAULTS = {
|
|
13
|
-
token_threshold: 100_000, # Compact when estimated tokens exceed this
|
|
14
|
-
message_threshold: 200, # Compact when message count exceeds this
|
|
15
|
-
retention_window: 6, # Minimum recent messages to always keep
|
|
16
|
-
summary_model: nil, # Model for summarization (uses agent's model if nil)
|
|
17
|
-
}.freeze
|
|
18
|
-
|
|
19
|
-
attr_reader :config
|
|
20
|
-
|
|
21
|
-
def initialize(provider, **opts)
|
|
22
|
-
@provider = provider
|
|
23
|
-
@config = DEFAULTS.merge(opts)
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
# Check whether compaction should run based on current context state.
|
|
27
|
-
def should_compact?(messages, usage: nil)
|
|
28
|
-
return true if messages.size > @config[:message_threshold]
|
|
29
|
-
return true if usage && (usage.total_tokens || 0) > @config[:token_threshold]
|
|
30
|
-
false
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
# Compact the message history by summarizing older messages.
|
|
34
|
-
#
|
|
35
|
-
# Returns [summary_message, kept_messages] — the caller rebuilds
|
|
36
|
-
# the context from these.
|
|
37
|
-
def compact(messages)
|
|
38
|
-
total = messages.size
|
|
39
|
-
keep_count = [@config[:retention_window], total].min
|
|
40
|
-
return nil if total <= keep_count
|
|
41
|
-
|
|
42
|
-
old_messages = messages[0...(total - keep_count)]
|
|
43
|
-
recent_messages = messages[(total - keep_count)..]
|
|
44
|
-
|
|
45
|
-
summary_text = summarize(old_messages)
|
|
46
|
-
|
|
47
|
-
[summary_text, recent_messages]
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
private
|
|
51
|
-
|
|
52
|
-
def summarize(messages)
|
|
53
|
-
# Build a condensed representation of the conversation for the summarizer
|
|
54
|
-
conversation_text = messages.map { |m|
|
|
55
|
-
role = if m.respond_to?(:role)
|
|
56
|
-
m.role.to_s
|
|
57
|
-
else
|
|
58
|
-
"unknown"
|
|
59
|
-
end
|
|
60
|
-
content = if m.respond_to?(:content)
|
|
61
|
-
m.content.to_s[0..1000]
|
|
62
|
-
else
|
|
63
|
-
m.to_s[0..1000]
|
|
64
|
-
end
|
|
65
|
-
|
|
66
|
-
# Include tool call info for assistant messages
|
|
67
|
-
tool_info = ""
|
|
68
|
-
if m.respond_to?(:functions) && m.functions&.any?
|
|
69
|
-
calls = m.functions.map { |f| "#{f.name}(#{f.arguments.to_s[0..200]})" }
|
|
70
|
-
tool_info = " [tools: #{calls.join(", ")}]"
|
|
71
|
-
end
|
|
72
|
-
|
|
73
|
-
"#{role}:#{tool_info} #{content}"
|
|
74
|
-
}.join("\n---\n")
|
|
75
|
-
|
|
76
|
-
prompt = <<~PROMPT
|
|
77
|
-
Summarize this conversation history for context continuity. The summary will replace
|
|
78
|
-
these messages in the context window, so include everything the agent needs to continue
|
|
79
|
-
working effectively.
|
|
80
|
-
|
|
81
|
-
Structure your summary as:
|
|
82
|
-
## Goal
|
|
83
|
-
What the user asked for.
|
|
84
|
-
|
|
85
|
-
## Progress
|
|
86
|
-
- Files read, created, or modified (list paths)
|
|
87
|
-
- Commands executed and their outcomes
|
|
88
|
-
- Key decisions made
|
|
89
|
-
|
|
90
|
-
## Current State
|
|
91
|
-
Where things stand right now — what's done and what remains.
|
|
92
|
-
|
|
93
|
-
## Next Steps
|
|
94
|
-
What should happen next based on the conversation.
|
|
95
|
-
|
|
96
|
-
---
|
|
97
|
-
CONVERSATION:
|
|
98
|
-
#{conversation_text}
|
|
99
|
-
PROMPT
|
|
100
|
-
|
|
101
|
-
model = @config[:summary_model] || "claude-sonnet-4-20250514"
|
|
102
|
-
res = @provider.complete(prompt, model: model)
|
|
103
|
-
res.content
|
|
104
|
-
end
|
|
105
|
-
end
|
|
106
|
-
end
|
|
107
|
-
end
|
data/lib/brute/loop/doom_loop.rb
DELETED
|
@@ -1,86 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Brute
|
|
4
|
-
module Loop
|
|
5
|
-
# Detects when the agent is stuck in a repeating pattern of tool calls.
|
|
6
|
-
#
|
|
7
|
-
# Two types of loops are detected:
|
|
8
|
-
# 1. Consecutive identical calls: [A, A, A] — same tool + same args
|
|
9
|
-
# 2. Repeating sequences: [A,B,C, A,B,C, A,B,C] — a pattern cycling
|
|
10
|
-
#
|
|
11
|
-
# When detected, a warning is injected into the context so the LLM
|
|
12
|
-
# can course-correct.
|
|
13
|
-
class DoomLoopDetector
|
|
14
|
-
DEFAULT_THRESHOLD = 3
|
|
15
|
-
|
|
16
|
-
attr_reader :threshold
|
|
17
|
-
|
|
18
|
-
def initialize(threshold: DEFAULT_THRESHOLD)
|
|
19
|
-
@threshold = threshold
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
# Extracts tool call signatures from the context's message buffer and
|
|
23
|
-
# checks for repeating patterns at the tail.
|
|
24
|
-
#
|
|
25
|
-
# Returns the repetition count if a loop is found, nil otherwise.
|
|
26
|
-
def detect(messages)
|
|
27
|
-
signatures = extract_signatures(messages)
|
|
28
|
-
return nil if signatures.size < @threshold
|
|
29
|
-
|
|
30
|
-
check_repeating_pattern(signatures)
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
# Build a human-readable warning message for the agent.
|
|
34
|
-
def warning_message(repetitions)
|
|
35
|
-
<<~MSG
|
|
36
|
-
SYSTEM NOTICE: Doom loop detected — the same tool call pattern has repeated #{repetitions} times.
|
|
37
|
-
You are stuck in a loop and not making progress. Stop and try a fundamentally different approach:
|
|
38
|
-
- Re-read the file to check your changes actually applied
|
|
39
|
-
- Try a different tool or strategy
|
|
40
|
-
- Break the problem into smaller steps
|
|
41
|
-
- If a command keeps failing, investigate why before retrying
|
|
42
|
-
MSG
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
private
|
|
46
|
-
|
|
47
|
-
# Extract [tool_name, arguments_json] pairs from assistant messages.
|
|
48
|
-
def extract_signatures(messages)
|
|
49
|
-
messages
|
|
50
|
-
.select { |m| m.respond_to?(:functions) && m.assistant? }
|
|
51
|
-
.flat_map { |m| m.functions.map { |f| [f.name.to_s, f.arguments.to_s] } }
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
# Check for repeating patterns of any length at the tail of the sequence.
|
|
55
|
-
# Returns the repetition count, or nil.
|
|
56
|
-
def check_repeating_pattern(sequence)
|
|
57
|
-
max_pattern_len = sequence.size / @threshold
|
|
58
|
-
|
|
59
|
-
(1..max_pattern_len).each do |pattern_len|
|
|
60
|
-
count = count_tail_repetitions(sequence, pattern_len)
|
|
61
|
-
return count if count >= @threshold
|
|
62
|
-
end
|
|
63
|
-
|
|
64
|
-
nil
|
|
65
|
-
end
|
|
66
|
-
|
|
67
|
-
# Count how many times a pattern of `length` repeats at the end of the sequence.
|
|
68
|
-
def count_tail_repetitions(sequence, length)
|
|
69
|
-
return 0 if sequence.size < length
|
|
70
|
-
|
|
71
|
-
pattern = sequence.last(length)
|
|
72
|
-
count = 1
|
|
73
|
-
pos = sequence.size - length
|
|
74
|
-
|
|
75
|
-
while pos >= length
|
|
76
|
-
candidate = sequence[(pos - length)...pos]
|
|
77
|
-
break unless candidate == pattern
|
|
78
|
-
count += 1
|
|
79
|
-
pos -= length
|
|
80
|
-
end
|
|
81
|
-
|
|
82
|
-
count
|
|
83
|
-
end
|
|
84
|
-
end
|
|
85
|
-
end
|
|
86
|
-
end
|