brute 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/brute/agent_stream.rb +49 -0
- data/lib/brute/compactor.rb +105 -0
- data/lib/brute/doom_loop.rb +84 -0
- data/lib/brute/file_mutation_queue.rb +99 -0
- data/lib/brute/hooks.rb +84 -0
- data/lib/brute/middleware/base.rb +27 -0
- data/lib/brute/middleware/compaction_check.rb +56 -0
- data/lib/brute/middleware/doom_loop_detection.rb +33 -0
- data/lib/brute/middleware/llm_call.rb +28 -0
- data/lib/brute/middleware/reasoning_normalizer.rb +98 -0
- data/lib/brute/middleware/retry.rb +45 -0
- data/lib/brute/middleware/session_persistence.rb +29 -0
- data/lib/brute/middleware/token_tracking.rb +46 -0
- data/lib/brute/middleware/tool_error_tracking.rb +46 -0
- data/lib/brute/middleware/tracing.rb +34 -0
- data/lib/brute/orchestrator.rb +297 -0
- data/lib/brute/patches/anthropic_tool_role.rb +35 -0
- data/lib/brute/patches/buffer_nil_guard.rb +21 -0
- data/lib/brute/pipeline.rb +81 -0
- data/lib/brute/session.rb +86 -0
- data/lib/brute/snapshot_store.rb +49 -0
- data/lib/brute/system_prompt.rb +88 -0
- data/lib/brute/todo_store.rb +27 -0
- data/lib/brute/tools/delegate.rb +35 -0
- data/lib/brute/tools/fs_patch.rb +37 -0
- data/lib/brute/tools/fs_read.rb +37 -0
- data/lib/brute/tools/fs_remove.rb +31 -0
- data/lib/brute/tools/fs_search.rb +38 -0
- data/lib/brute/tools/fs_undo.rb +29 -0
- data/lib/brute/tools/fs_write.rb +26 -0
- data/lib/brute/tools/net_fetch.rb +37 -0
- data/lib/brute/tools/shell.rb +38 -0
- data/lib/brute/tools/todo_read.rb +15 -0
- data/lib/brute/tools/todo_write.rb +32 -0
- data/lib/brute.rb +121 -0
- metadata +101 -0
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Brute
|
|
4
|
+
module Middleware
|
|
5
|
+
# Retries the inner call on transient LLM errors with exponential backoff.
|
|
6
|
+
#
|
|
7
|
+
# Catches LLM::RateLimitError and LLM::ServerError, sleeps with
|
|
8
|
+
# exponential delay, and re-calls the inner app. Non-retryable errors
|
|
9
|
+
# propagate immediately.
|
|
10
|
+
#
|
|
11
|
+
# Unlike forgecode's separate retry.rs, this middleware wraps the LLM call
|
|
12
|
+
# directly — it sees the error and retries without the orchestrator knowing.
|
|
13
|
+
#
|
|
14
|
+
class Retry < Base
|
|
15
|
+
DEFAULT_MAX_ATTEMPTS = 3
|
|
16
|
+
DEFAULT_BASE_DELAY = 2 # seconds
|
|
17
|
+
|
|
18
|
+
def initialize(app, max_attempts: DEFAULT_MAX_ATTEMPTS, base_delay: DEFAULT_BASE_DELAY)
|
|
19
|
+
super(app)
|
|
20
|
+
@max_attempts = max_attempts
|
|
21
|
+
@base_delay = base_delay
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def call(env)
|
|
25
|
+
attempts = 0
|
|
26
|
+
begin
|
|
27
|
+
@app.call(env)
|
|
28
|
+
rescue LLM::RateLimitError, LLM::ServerError => e
|
|
29
|
+
attempts += 1
|
|
30
|
+
if attempts >= @max_attempts
|
|
31
|
+
env[:metadata][:last_error] = e.message
|
|
32
|
+
raise
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
delay = @base_delay ** attempts
|
|
36
|
+
env[:metadata][:retry_attempt] = attempts
|
|
37
|
+
env[:metadata][:retry_delay] = delay
|
|
38
|
+
|
|
39
|
+
sleep(delay)
|
|
40
|
+
retry
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Brute
|
|
4
|
+
module Middleware
|
|
5
|
+
# Saves the conversation to disk after each LLM call.
|
|
6
|
+
#
|
|
7
|
+
# Runs POST-call: delegates to Session#save. Failures are non-fatal —
|
|
8
|
+
# a broken session save should never crash the agent loop.
|
|
9
|
+
#
|
|
10
|
+
class SessionPersistence < Base
|
|
11
|
+
def initialize(app, session:)
|
|
12
|
+
super(app)
|
|
13
|
+
@session = session
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def call(env)
|
|
17
|
+
response = @app.call(env)
|
|
18
|
+
|
|
19
|
+
begin
|
|
20
|
+
@session.save(env[:context])
|
|
21
|
+
rescue => e
|
|
22
|
+
warn "[brute] Session save failed: #{e.message}"
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
response
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Brute
|
|
4
|
+
module Middleware
|
|
5
|
+
# Tracks cumulative token usage across all LLM calls in a session.
|
|
6
|
+
#
|
|
7
|
+
# Runs POST-call: reads usage from the response and accumulates totals
|
|
8
|
+
# in env[:metadata]. Also records per-call usage for the most recent call.
|
|
9
|
+
#
|
|
10
|
+
class TokenTracking < Base
|
|
11
|
+
def initialize(app)
|
|
12
|
+
super(app)
|
|
13
|
+
@total_input = 0
|
|
14
|
+
@total_output = 0
|
|
15
|
+
@total_reasoning = 0
|
|
16
|
+
@call_count = 0
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def call(env)
|
|
20
|
+
response = @app.call(env)
|
|
21
|
+
|
|
22
|
+
if response.respond_to?(:usage) && (usage = response.usage)
|
|
23
|
+
@total_input += usage.input_tokens.to_i
|
|
24
|
+
@total_output += usage.output_tokens.to_i
|
|
25
|
+
@total_reasoning += usage.reasoning_tokens.to_i
|
|
26
|
+
@call_count += 1
|
|
27
|
+
|
|
28
|
+
env[:metadata][:tokens] = {
|
|
29
|
+
total_input: @total_input,
|
|
30
|
+
total_output: @total_output,
|
|
31
|
+
total_reasoning: @total_reasoning,
|
|
32
|
+
total: @total_input + @total_output,
|
|
33
|
+
call_count: @call_count,
|
|
34
|
+
last_call: {
|
|
35
|
+
input: usage.input_tokens.to_i,
|
|
36
|
+
output: usage.output_tokens.to_i,
|
|
37
|
+
total: usage.total_tokens.to_i,
|
|
38
|
+
},
|
|
39
|
+
}
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
response
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Brute
|
|
4
|
+
module Middleware
|
|
5
|
+
# Tracks per-tool error counts across LLM calls and signals when
|
|
6
|
+
# the error ceiling is reached.
|
|
7
|
+
#
|
|
8
|
+
# This middleware doesn't execute tools itself — it inspects the tool
|
|
9
|
+
# results that were sent as input to the LLM call (env[:tool_results])
|
|
10
|
+
# and counts failures.
|
|
11
|
+
#
|
|
12
|
+
# When any tool exceeds max_failures, it sets env[:metadata][:tool_error_limit_reached]
|
|
13
|
+
# so the orchestrator can decide to stop.
|
|
14
|
+
#
|
|
15
|
+
class ToolErrorTracking < Base
|
|
16
|
+
DEFAULT_MAX_FAILURES = 3
|
|
17
|
+
|
|
18
|
+
def initialize(app, max_failures: DEFAULT_MAX_FAILURES)
|
|
19
|
+
super(app)
|
|
20
|
+
@max_failures = max_failures
|
|
21
|
+
@errors = Hash.new(0) # tool_name → count
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def call(env)
|
|
25
|
+
# PRE: count errors from tool results that are about to be sent
|
|
26
|
+
if (results = env[:tool_results])
|
|
27
|
+
results.each do |name, result|
|
|
28
|
+
if result.is_a?(Hash) && result[:error]
|
|
29
|
+
@errors[name] += 1
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
env[:metadata][:tool_errors] = @errors.dup
|
|
35
|
+
env[:metadata][:tool_error_limit_reached] = @errors.any? { |_, c| c >= @max_failures }
|
|
36
|
+
|
|
37
|
+
@app.call(env)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Reset error counts (e.g., between user turns).
|
|
41
|
+
def reset!
|
|
42
|
+
@errors.clear
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Brute
|
|
4
|
+
module Middleware
|
|
5
|
+
# Logs timing and token usage for every LLM call.
|
|
6
|
+
#
|
|
7
|
+
# Wraps the call with wall-clock timing. Logs:
|
|
8
|
+
# PRE: request number, message count
|
|
9
|
+
# POST: elapsed time, token usage, finish reason
|
|
10
|
+
#
|
|
11
|
+
class Tracing < Base
|
|
12
|
+
def initialize(app, logger:)
|
|
13
|
+
super(app)
|
|
14
|
+
@logger = logger
|
|
15
|
+
@call_count = 0
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def call(env)
|
|
19
|
+
@call_count += 1
|
|
20
|
+
messages = env[:context].messages.to_a
|
|
21
|
+
@logger.debug("[brute] LLM call ##{@call_count} (#{messages.size} messages in context)")
|
|
22
|
+
|
|
23
|
+
start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
24
|
+
response = @app.call(env)
|
|
25
|
+
elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start
|
|
26
|
+
|
|
27
|
+
tokens = response.respond_to?(:usage) ? response.usage&.total_tokens : "?"
|
|
28
|
+
@logger.info("[brute] LLM response ##{@call_count}: #{tokens} tokens, #{elapsed.round(2)}s")
|
|
29
|
+
|
|
30
|
+
response
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "async"
|
|
4
|
+
require "async/barrier"
|
|
5
|
+
|
|
6
|
+
module Brute
|
|
7
|
+
# The core agent loop. Drives the cycle of:
|
|
8
|
+
#
|
|
9
|
+
# prompt → LLM → tool calls → execute → send results → repeat
|
|
10
|
+
#
|
|
11
|
+
# All cross-cutting concerns (retry, compaction, doom loop detection,
|
|
12
|
+
# token tracking, session persistence, tracing, reasoning) are implemented
|
|
13
|
+
# as Rack-style middleware in the Pipeline. The orchestrator is now a
|
|
14
|
+
# thin loop that:
|
|
15
|
+
#
|
|
16
|
+
# 1. Sends input through the pipeline (which wraps the LLM call)
|
|
17
|
+
# 2. Executes any tool calls the LLM requested
|
|
18
|
+
# 3. Repeats until done or a limit is hit
|
|
19
|
+
#
|
|
20
|
+
class Orchestrator
|
|
21
|
+
MAX_REQUESTS_PER_TURN = 100
|
|
22
|
+
|
|
23
|
+
attr_reader :context, :session, :pipeline, :env, :barrier
|
|
24
|
+
|
|
25
|
+
def initialize(
|
|
26
|
+
provider:,
|
|
27
|
+
tools: Brute::TOOLS,
|
|
28
|
+
cwd: Dir.pwd,
|
|
29
|
+
session: nil,
|
|
30
|
+
compactor_opts: {},
|
|
31
|
+
reasoning: {},
|
|
32
|
+
on_content: nil,
|
|
33
|
+
on_reasoning: nil,
|
|
34
|
+
on_tool_call: nil,
|
|
35
|
+
on_tool_result: nil,
|
|
36
|
+
logger: nil
|
|
37
|
+
)
|
|
38
|
+
@provider = provider
|
|
39
|
+
@tool_classes = tools
|
|
40
|
+
@cwd = cwd
|
|
41
|
+
@session = session || Session.new
|
|
42
|
+
@logger = logger || Logger.new($stderr, level: Logger::INFO)
|
|
43
|
+
|
|
44
|
+
# Build system prompt
|
|
45
|
+
custom_rules = load_custom_rules
|
|
46
|
+
prompt_builder = SystemPrompt.new(cwd: @cwd, tools: @tool_classes, custom_rules: custom_rules)
|
|
47
|
+
@system_prompt = prompt_builder.build
|
|
48
|
+
|
|
49
|
+
# Initialize the LLM context (with streaming when callbacks provided)
|
|
50
|
+
@stream = if on_content || on_reasoning
|
|
51
|
+
AgentStream.new(
|
|
52
|
+
on_content: on_content,
|
|
53
|
+
on_reasoning: on_reasoning,
|
|
54
|
+
on_tool_call: on_tool_call,
|
|
55
|
+
on_tool_result: on_tool_result,
|
|
56
|
+
)
|
|
57
|
+
end
|
|
58
|
+
@context = LLM::Context.new(@provider, tools: @tool_classes,
|
|
59
|
+
**(@stream ? {stream: @stream} : {}))
|
|
60
|
+
|
|
61
|
+
# Build the middleware pipeline
|
|
62
|
+
compactor = Compactor.new(provider, **compactor_opts)
|
|
63
|
+
@pipeline = build_pipeline(
|
|
64
|
+
compactor: compactor,
|
|
65
|
+
session: @session,
|
|
66
|
+
logger: @logger,
|
|
67
|
+
reasoning: reasoning,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
# The shared env hash — passed to every pipeline.call()
|
|
71
|
+
@env = {
|
|
72
|
+
context: @context,
|
|
73
|
+
provider: @provider,
|
|
74
|
+
tools: @tool_classes,
|
|
75
|
+
input: nil,
|
|
76
|
+
params: {},
|
|
77
|
+
metadata: {},
|
|
78
|
+
tool_results: nil,
|
|
79
|
+
streaming: !!@stream,
|
|
80
|
+
callbacks: {
|
|
81
|
+
on_content: on_content,
|
|
82
|
+
on_reasoning: on_reasoning,
|
|
83
|
+
on_tool_call: on_tool_call,
|
|
84
|
+
on_tool_result: on_tool_result,
|
|
85
|
+
},
|
|
86
|
+
}
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Run a single user turn. Loops internally until the agent either
|
|
90
|
+
# completes (no more tool calls) or hits a limit.
|
|
91
|
+
#
|
|
92
|
+
# Returns the final assistant response.
|
|
93
|
+
def run(user_message)
|
|
94
|
+
@request_count = 0
|
|
95
|
+
|
|
96
|
+
# Build the initial prompt with system message on first turn
|
|
97
|
+
input = if first_turn?
|
|
98
|
+
@context.prompt do |p|
|
|
99
|
+
p.system @system_prompt
|
|
100
|
+
p.user user_message
|
|
101
|
+
end
|
|
102
|
+
else
|
|
103
|
+
user_message
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# --- First LLM call ---
|
|
107
|
+
@env[:input] = input
|
|
108
|
+
@env[:tool_results] = nil
|
|
109
|
+
last_response = @pipeline.call(@env)
|
|
110
|
+
sync_context!
|
|
111
|
+
|
|
112
|
+
# --- Agent loop ---
|
|
113
|
+
loop do
|
|
114
|
+
break if @context.functions.empty?
|
|
115
|
+
|
|
116
|
+
# Collect tool results.
|
|
117
|
+
# Streaming: tools already spawned threads during the LLM response — just join them.
|
|
118
|
+
# Non-streaming: execute manually (parallel or sequential).
|
|
119
|
+
results = if @stream && !@stream.queue.empty?
|
|
120
|
+
@context.wait(:thread)
|
|
121
|
+
else
|
|
122
|
+
execute_tool_calls
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Send results back through the pipeline
|
|
126
|
+
@env[:input] = results
|
|
127
|
+
@env[:tool_results] = extract_tool_result_pairs(results)
|
|
128
|
+
last_response = @pipeline.call(@env)
|
|
129
|
+
sync_context!
|
|
130
|
+
|
|
131
|
+
@request_count += 1
|
|
132
|
+
|
|
133
|
+
# Check limits
|
|
134
|
+
break if @context.functions.empty?
|
|
135
|
+
break if @request_count >= MAX_REQUESTS_PER_TURN
|
|
136
|
+
break if @env[:metadata][:tool_error_limit_reached]
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
last_response
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
private
|
|
143
|
+
|
|
144
|
+
# ------------------------------------------------------------------
|
|
145
|
+
# Pipeline construction
|
|
146
|
+
# ------------------------------------------------------------------
|
|
147
|
+
|
|
148
|
+
def build_pipeline(compactor:, session:, logger:, reasoning:)
|
|
149
|
+
sys_prompt = @system_prompt
|
|
150
|
+
tools = @tool_classes
|
|
151
|
+
|
|
152
|
+
Pipeline.new do
|
|
153
|
+
# Outermost: timing and logging (sees total elapsed including retries)
|
|
154
|
+
use Middleware::Tracing, logger: logger
|
|
155
|
+
|
|
156
|
+
# Retry transient errors (wraps everything below)
|
|
157
|
+
use Middleware::Retry
|
|
158
|
+
|
|
159
|
+
# Save after each successful LLM call
|
|
160
|
+
use Middleware::SessionPersistence, session: session
|
|
161
|
+
|
|
162
|
+
# Track cumulative token usage
|
|
163
|
+
use Middleware::TokenTracking
|
|
164
|
+
|
|
165
|
+
# Check context size and compact if needed
|
|
166
|
+
use Middleware::CompactionCheck,
|
|
167
|
+
compactor: compactor,
|
|
168
|
+
system_prompt: sys_prompt,
|
|
169
|
+
tools: tools
|
|
170
|
+
|
|
171
|
+
# Track per-tool errors
|
|
172
|
+
use Middleware::ToolErrorTracking
|
|
173
|
+
|
|
174
|
+
# Detect and break doom loops (pre-call)
|
|
175
|
+
use Middleware::DoomLoopDetection
|
|
176
|
+
|
|
177
|
+
# Handle reasoning params and model-switch normalization (pre-call)
|
|
178
|
+
use Middleware::ReasoningNormalizer, **reasoning unless reasoning.empty?
|
|
179
|
+
|
|
180
|
+
# Innermost: the actual LLM call
|
|
181
|
+
run Middleware::LLMCall.new
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
# ------------------------------------------------------------------
|
|
186
|
+
# Tool execution
|
|
187
|
+
# ------------------------------------------------------------------
|
|
188
|
+
|
|
189
|
+
def execute_tool_calls
|
|
190
|
+
pending = @context.functions.to_a
|
|
191
|
+
return execute_sequential(pending) if pending.size <= 1
|
|
192
|
+
|
|
193
|
+
execute_parallel(pending)
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
# Run a single tool call synchronously.
|
|
197
|
+
def execute_sequential(functions)
|
|
198
|
+
on_call = @env.dig(:callbacks, :on_tool_call)
|
|
199
|
+
on_result = @env.dig(:callbacks, :on_tool_result)
|
|
200
|
+
|
|
201
|
+
functions.map do |fn|
|
|
202
|
+
on_call&.call(fn.name, fn.arguments)
|
|
203
|
+
result = fn.call
|
|
204
|
+
on_result&.call(fn.name, result_value(result))
|
|
205
|
+
result
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
# Run all pending tool calls concurrently via Async::Barrier.
|
|
210
|
+
#
|
|
211
|
+
# Each tool runs in its own fiber. File-mutating tools are safe because
|
|
212
|
+
# they go through FileMutationQueue, whose Mutex is fiber-scheduler-aware
|
|
213
|
+
# in Ruby 3.4 — a fiber blocked on a per-file mutex yields to other
|
|
214
|
+
# fibers instead of blocking the thread.
|
|
215
|
+
#
|
|
216
|
+
# The barrier is stored in @barrier so abort! can cancel in-flight tools.
|
|
217
|
+
#
|
|
218
|
+
def execute_parallel(functions)
|
|
219
|
+
on_call = @env.dig(:callbacks, :on_tool_call)
|
|
220
|
+
on_result = @env.dig(:callbacks, :on_tool_result)
|
|
221
|
+
|
|
222
|
+
results = Array.new(functions.size)
|
|
223
|
+
|
|
224
|
+
Async do
|
|
225
|
+
@barrier = Async::Barrier.new
|
|
226
|
+
|
|
227
|
+
functions.each_with_index do |fn, i|
|
|
228
|
+
@barrier.async do
|
|
229
|
+
on_call&.call(fn.name, fn.arguments)
|
|
230
|
+
results[i] = fn.call
|
|
231
|
+
r = results[i]
|
|
232
|
+
on_result&.call(r.name, result_value(r))
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
@barrier.wait
|
|
237
|
+
ensure
|
|
238
|
+
@barrier&.stop
|
|
239
|
+
@barrier = nil
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
results
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
public
|
|
246
|
+
|
|
247
|
+
# Cancel any in-flight tool execution. Safe to call from a signal
|
|
248
|
+
# handler, another thread, or an interface layer (TUI, web, RPC).
|
|
249
|
+
#
|
|
250
|
+
# When called, Async::Stop is raised in each running fiber, unwinding
|
|
251
|
+
# through ensure blocks — so FileMutationQueue mutexes release cleanly
|
|
252
|
+
# and SnapshotStore stays consistent.
|
|
253
|
+
#
|
|
254
|
+
def abort!
|
|
255
|
+
@barrier&.stop
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
private
|
|
259
|
+
|
|
260
|
+
# ------------------------------------------------------------------
|
|
261
|
+
# Helpers
|
|
262
|
+
# ------------------------------------------------------------------
|
|
263
|
+
|
|
264
|
+
# After a pipeline call, the compaction middleware may have replaced
|
|
265
|
+
# the context. Sync our local reference.
|
|
266
|
+
def sync_context!
|
|
267
|
+
@context = @env[:context]
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
def first_turn?
|
|
271
|
+
@context.messages.to_a.empty?
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
def result_value(result)
|
|
275
|
+
result.respond_to?(:value) ? result.value : result
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
# Build [name, value] pairs from tool results for ToolErrorTracking.
|
|
279
|
+
def extract_tool_result_pairs(results)
|
|
280
|
+
results.filter_map do |r|
|
|
281
|
+
name = r.respond_to?(:name) ? r.name : "unknown"
|
|
282
|
+
val = result_value(r)
|
|
283
|
+
[name, val]
|
|
284
|
+
end
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
# Load AGENTS.md or .brute/rules from the working directory.
|
|
288
|
+
def load_custom_rules
|
|
289
|
+
candidates = [
|
|
290
|
+
File.join(@cwd, "AGENTS.md"),
|
|
291
|
+
File.join(@cwd, ".brute", "rules.md"),
|
|
292
|
+
]
|
|
293
|
+
found = candidates.find { |p| File.exist?(p) }
|
|
294
|
+
found ? File.read(found) : nil
|
|
295
|
+
end
|
|
296
|
+
end
|
|
297
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Monkey-patch: Fix Anthropic tool result message role.
|
|
4
|
+
#
|
|
5
|
+
# llm.rb stores tool results as messages with role="tool" (via @llm.tool_role).
|
|
6
|
+
# Anthropic's API requires tool result messages to have role="user" with
|
|
7
|
+
# tool_result content blocks. The Completion adapter already correctly formats
|
|
8
|
+
# the content (Function::Return -> {type: "tool_result", ...}), but passes
|
|
9
|
+
# through the "tool" role unchanged — which Anthropic rejects.
|
|
10
|
+
#
|
|
11
|
+
# This patch overrides adapt_message to set role="user" when the message
|
|
12
|
+
# content contains tool returns.
|
|
13
|
+
|
|
14
|
+
module Brute
|
|
15
|
+
module Patches
|
|
16
|
+
module AnthropicToolRole
|
|
17
|
+
private
|
|
18
|
+
|
|
19
|
+
def adapt_message
|
|
20
|
+
if message.respond_to?(:role) && message.role.to_s == "tool"
|
|
21
|
+
{role: "user", content: adapt_content(content)}
|
|
22
|
+
else
|
|
23
|
+
super
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Apply the patch lazily — LLM::Anthropic is autoloaded.
|
|
28
|
+
def self.apply!
|
|
29
|
+
return if @applied
|
|
30
|
+
@applied = true
|
|
31
|
+
LLM::Anthropic::RequestAdapter::Completion.prepend(self)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Monkey-patch: Guard LLM::Buffer against nil entries.
|
|
4
|
+
#
|
|
5
|
+
# llm.rb's Context#talk can sometimes concatenate nil into the message
|
|
6
|
+
# buffer (e.g. when response parsing yields a nil choice). This causes
|
|
7
|
+
# NoMethodError when the buffer is iterated (assistant?, tool_return?, etc).
|
|
8
|
+
#
|
|
9
|
+
# This patch overrides concat to filter out nils before they enter the buffer.
|
|
10
|
+
|
|
11
|
+
module Brute
|
|
12
|
+
module Patches
|
|
13
|
+
module BufferNilGuard
|
|
14
|
+
def concat(messages)
|
|
15
|
+
super(Array(messages).compact)
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
LLM::Buffer.prepend(Brute::Patches::BufferNilGuard)
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Brute
|
|
4
|
+
# Rack-style middleware pipeline for LLM calls.
|
|
5
|
+
#
|
|
6
|
+
# Each middleware wraps the next, forming an onion model:
|
|
7
|
+
#
|
|
8
|
+
# Tracing → Retry → DoomLoop → Reasoning → [LLM Call] → Reasoning → DoomLoop → Retry → Tracing
|
|
9
|
+
#
|
|
10
|
+
# The innermost "app" is the actual LLM call. Each middleware can:
|
|
11
|
+
# - Modify the env (context, params) BEFORE the call (pre-processing)
|
|
12
|
+
# - Modify or inspect the response AFTER the call (post-processing)
|
|
13
|
+
# - Short-circuit (return without calling inner app)
|
|
14
|
+
# - Retry (call inner app multiple times)
|
|
15
|
+
#
|
|
16
|
+
# ## The env hash
|
|
17
|
+
#
|
|
18
|
+
# {
|
|
19
|
+
# context: LLM::Context, # conversation state
|
|
20
|
+
# provider: LLM::Provider, # the LLM provider
|
|
21
|
+
# input: <prompt/results>, # what to pass to context.talk()
|
|
22
|
+
# tools: [Tool, ...], # tool classes
|
|
23
|
+
# params: {}, # extra LLM call params (reasoning config, etc.)
|
|
24
|
+
# metadata: {}, # shared scratchpad for middleware state
|
|
25
|
+
# callbacks: {}, # :on_content, :on_tool_call, :on_tool_result
|
|
26
|
+
# }
|
|
27
|
+
#
|
|
28
|
+
# ## The response
|
|
29
|
+
#
|
|
30
|
+
# The return value of call(env) is the LLM::Message from context.talk().
|
|
31
|
+
#
|
|
32
|
+
# ## Building a pipeline
|
|
33
|
+
#
|
|
34
|
+
# pipeline = Brute::Pipeline.new do
|
|
35
|
+
# use Brute::Middleware::Tracing, logger: logger
|
|
36
|
+
# use Brute::Middleware::Retry, max_attempts: 3
|
|
37
|
+
# use Brute::Middleware::SessionPersistence, session: session
|
|
38
|
+
# run Brute::Middleware::LLMCall.new
|
|
39
|
+
# end
|
|
40
|
+
#
|
|
41
|
+
# response = pipeline.call(env)
|
|
42
|
+
#
|
|
43
|
+
class Pipeline
|
|
44
|
+
def initialize(&block)
|
|
45
|
+
@middlewares = []
|
|
46
|
+
@app = nil
|
|
47
|
+
instance_eval(&block) if block
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Register a middleware class.
|
|
51
|
+
# The class must implement `initialize(app, *args, **kwargs)` and `call(env)`.
|
|
52
|
+
def use(klass, *args, **kwargs, &block)
|
|
53
|
+
@middlewares << [klass, args, kwargs, block]
|
|
54
|
+
self
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Set the terminal app (innermost handler).
|
|
58
|
+
def run(app)
|
|
59
|
+
@app = app
|
|
60
|
+
self
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Build the full middleware chain and call it.
|
|
64
|
+
def call(env)
|
|
65
|
+
build.call(env)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Build the chain without calling it. Useful for inspection or caching.
|
|
69
|
+
def build
|
|
70
|
+
raise "Pipeline has no terminal app — call `run` first" unless @app
|
|
71
|
+
|
|
72
|
+
@middlewares.reverse.inject(@app) do |inner, (klass, args, kwargs, block)|
|
|
73
|
+
if block
|
|
74
|
+
klass.new(inner, *args, **kwargs, &block)
|
|
75
|
+
else
|
|
76
|
+
klass.new(inner, *args, **kwargs)
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|