brute 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +7 -0
  2. data/lib/brute/agent_stream.rb +49 -0
  3. data/lib/brute/compactor.rb +105 -0
  4. data/lib/brute/doom_loop.rb +84 -0
  5. data/lib/brute/file_mutation_queue.rb +99 -0
  6. data/lib/brute/hooks.rb +84 -0
  7. data/lib/brute/middleware/base.rb +27 -0
  8. data/lib/brute/middleware/compaction_check.rb +56 -0
  9. data/lib/brute/middleware/doom_loop_detection.rb +33 -0
  10. data/lib/brute/middleware/llm_call.rb +28 -0
  11. data/lib/brute/middleware/reasoning_normalizer.rb +98 -0
  12. data/lib/brute/middleware/retry.rb +45 -0
  13. data/lib/brute/middleware/session_persistence.rb +29 -0
  14. data/lib/brute/middleware/token_tracking.rb +46 -0
  15. data/lib/brute/middleware/tool_error_tracking.rb +46 -0
  16. data/lib/brute/middleware/tracing.rb +34 -0
  17. data/lib/brute/orchestrator.rb +297 -0
  18. data/lib/brute/patches/anthropic_tool_role.rb +35 -0
  19. data/lib/brute/patches/buffer_nil_guard.rb +21 -0
  20. data/lib/brute/pipeline.rb +81 -0
  21. data/lib/brute/session.rb +86 -0
  22. data/lib/brute/snapshot_store.rb +49 -0
  23. data/lib/brute/system_prompt.rb +88 -0
  24. data/lib/brute/todo_store.rb +27 -0
  25. data/lib/brute/tools/delegate.rb +35 -0
  26. data/lib/brute/tools/fs_patch.rb +37 -0
  27. data/lib/brute/tools/fs_read.rb +37 -0
  28. data/lib/brute/tools/fs_remove.rb +31 -0
  29. data/lib/brute/tools/fs_search.rb +38 -0
  30. data/lib/brute/tools/fs_undo.rb +29 -0
  31. data/lib/brute/tools/fs_write.rb +26 -0
  32. data/lib/brute/tools/net_fetch.rb +37 -0
  33. data/lib/brute/tools/shell.rb +38 -0
  34. data/lib/brute/tools/todo_read.rb +15 -0
  35. data/lib/brute/tools/todo_write.rb +32 -0
  36. data/lib/brute.rb +121 -0
  37. metadata +101 -0
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Brute
4
+ module Middleware
5
+ # Retries the inner call on transient LLM errors with exponential backoff.
6
+ #
7
+ # Catches LLM::RateLimitError and LLM::ServerError, sleeps with
8
+ # exponential delay, and re-calls the inner app. Non-retryable errors
9
+ # propagate immediately.
10
+ #
11
+ # Unlike forgecode's separate retry.rs, this middleware wraps the LLM call
12
+ # directly — it sees the error and retries without the orchestrator knowing.
13
+ #
14
+ class Retry < Base
15
+ DEFAULT_MAX_ATTEMPTS = 3
16
+ DEFAULT_BASE_DELAY = 2 # seconds
17
+
18
+ def initialize(app, max_attempts: DEFAULT_MAX_ATTEMPTS, base_delay: DEFAULT_BASE_DELAY)
19
+ super(app)
20
+ @max_attempts = max_attempts
21
+ @base_delay = base_delay
22
+ end
23
+
24
+ def call(env)
25
+ attempts = 0
26
+ begin
27
+ @app.call(env)
28
+ rescue LLM::RateLimitError, LLM::ServerError => e
29
+ attempts += 1
30
+ if attempts >= @max_attempts
31
+ env[:metadata][:last_error] = e.message
32
+ raise
33
+ end
34
+
35
+ delay = @base_delay ** attempts
36
+ env[:metadata][:retry_attempt] = attempts
37
+ env[:metadata][:retry_delay] = delay
38
+
39
+ sleep(delay)
40
+ retry
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Brute
4
+ module Middleware
5
+ # Saves the conversation to disk after each LLM call.
6
+ #
7
+ # Runs POST-call: delegates to Session#save. Failures are non-fatal —
8
+ # a broken session save should never crash the agent loop.
9
+ #
10
+ class SessionPersistence < Base
11
+ def initialize(app, session:)
12
+ super(app)
13
+ @session = session
14
+ end
15
+
16
+ def call(env)
17
+ response = @app.call(env)
18
+
19
+ begin
20
+ @session.save(env[:context])
21
+ rescue => e
22
+ warn "[brute] Session save failed: #{e.message}"
23
+ end
24
+
25
+ response
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Brute
4
+ module Middleware
5
+ # Tracks cumulative token usage across all LLM calls in a session.
6
+ #
7
+ # Runs POST-call: reads usage from the response and accumulates totals
8
+ # in env[:metadata]. Also records per-call usage for the most recent call.
9
+ #
10
+ class TokenTracking < Base
11
+ def initialize(app)
12
+ super(app)
13
+ @total_input = 0
14
+ @total_output = 0
15
+ @total_reasoning = 0
16
+ @call_count = 0
17
+ end
18
+
19
+ def call(env)
20
+ response = @app.call(env)
21
+
22
+ if response.respond_to?(:usage) && (usage = response.usage)
23
+ @total_input += usage.input_tokens.to_i
24
+ @total_output += usage.output_tokens.to_i
25
+ @total_reasoning += usage.reasoning_tokens.to_i
26
+ @call_count += 1
27
+
28
+ env[:metadata][:tokens] = {
29
+ total_input: @total_input,
30
+ total_output: @total_output,
31
+ total_reasoning: @total_reasoning,
32
+ total: @total_input + @total_output,
33
+ call_count: @call_count,
34
+ last_call: {
35
+ input: usage.input_tokens.to_i,
36
+ output: usage.output_tokens.to_i,
37
+ total: usage.total_tokens.to_i,
38
+ },
39
+ }
40
+ end
41
+
42
+ response
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Brute
4
+ module Middleware
5
+ # Tracks per-tool error counts across LLM calls and signals when
6
+ # the error ceiling is reached.
7
+ #
8
+ # This middleware doesn't execute tools itself — it inspects the tool
9
+ # results that were sent as input to the LLM call (env[:tool_results])
10
+ # and counts failures.
11
+ #
12
+ # When any tool exceeds max_failures, it sets env[:metadata][:tool_error_limit_reached]
13
+ # so the orchestrator can decide to stop.
14
+ #
15
+ class ToolErrorTracking < Base
16
+ DEFAULT_MAX_FAILURES = 3
17
+
18
+ def initialize(app, max_failures: DEFAULT_MAX_FAILURES)
19
+ super(app)
20
+ @max_failures = max_failures
21
+ @errors = Hash.new(0) # tool_name → count
22
+ end
23
+
24
+ def call(env)
25
+ # PRE: count errors from tool results that are about to be sent
26
+ if (results = env[:tool_results])
27
+ results.each do |name, result|
28
+ if result.is_a?(Hash) && result[:error]
29
+ @errors[name] += 1
30
+ end
31
+ end
32
+ end
33
+
34
+ env[:metadata][:tool_errors] = @errors.dup
35
+ env[:metadata][:tool_error_limit_reached] = @errors.any? { |_, c| c >= @max_failures }
36
+
37
+ @app.call(env)
38
+ end
39
+
40
+ # Reset error counts (e.g., between user turns).
41
+ def reset!
42
+ @errors.clear
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Brute
4
+ module Middleware
5
+ # Logs timing and token usage for every LLM call.
6
+ #
7
+ # Wraps the call with wall-clock timing. Logs:
8
+ # PRE: request number, message count
9
+ # POST: elapsed time, token usage, finish reason
10
+ #
11
+ class Tracing < Base
12
+ def initialize(app, logger:)
13
+ super(app)
14
+ @logger = logger
15
+ @call_count = 0
16
+ end
17
+
18
+ def call(env)
19
+ @call_count += 1
20
+ messages = env[:context].messages.to_a
21
+ @logger.debug("[brute] LLM call ##{@call_count} (#{messages.size} messages in context)")
22
+
23
+ start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
24
+ response = @app.call(env)
25
+ elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start
26
+
27
+ tokens = response.respond_to?(:usage) ? response.usage&.total_tokens : "?"
28
+ @logger.info("[brute] LLM response ##{@call_count}: #{tokens} tokens, #{elapsed.round(2)}s")
29
+
30
+ response
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,297 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "async"
4
+ require "async/barrier"
5
+
6
+ module Brute
7
+ # The core agent loop. Drives the cycle of:
8
+ #
9
+ # prompt → LLM → tool calls → execute → send results → repeat
10
+ #
11
+ # All cross-cutting concerns (retry, compaction, doom loop detection,
12
+ # token tracking, session persistence, tracing, reasoning) are implemented
13
+ # as Rack-style middleware in the Pipeline. The orchestrator is now a
14
+ # thin loop that:
15
+ #
16
+ # 1. Sends input through the pipeline (which wraps the LLM call)
17
+ # 2. Executes any tool calls the LLM requested
18
+ # 3. Repeats until done or a limit is hit
19
+ #
20
+ class Orchestrator
21
+ MAX_REQUESTS_PER_TURN = 100
22
+
23
+ attr_reader :context, :session, :pipeline, :env, :barrier
24
+
25
+ def initialize(
26
+ provider:,
27
+ tools: Brute::TOOLS,
28
+ cwd: Dir.pwd,
29
+ session: nil,
30
+ compactor_opts: {},
31
+ reasoning: {},
32
+ on_content: nil,
33
+ on_reasoning: nil,
34
+ on_tool_call: nil,
35
+ on_tool_result: nil,
36
+ logger: nil
37
+ )
38
+ @provider = provider
39
+ @tool_classes = tools
40
+ @cwd = cwd
41
+ @session = session || Session.new
42
+ @logger = logger || Logger.new($stderr, level: Logger::INFO)
43
+
44
+ # Build system prompt
45
+ custom_rules = load_custom_rules
46
+ prompt_builder = SystemPrompt.new(cwd: @cwd, tools: @tool_classes, custom_rules: custom_rules)
47
+ @system_prompt = prompt_builder.build
48
+
49
+ # Initialize the LLM context (with streaming when callbacks provided)
50
+ @stream = if on_content || on_reasoning
51
+ AgentStream.new(
52
+ on_content: on_content,
53
+ on_reasoning: on_reasoning,
54
+ on_tool_call: on_tool_call,
55
+ on_tool_result: on_tool_result,
56
+ )
57
+ end
58
+ @context = LLM::Context.new(@provider, tools: @tool_classes,
59
+ **(@stream ? {stream: @stream} : {}))
60
+
61
+ # Build the middleware pipeline
62
+ compactor = Compactor.new(provider, **compactor_opts)
63
+ @pipeline = build_pipeline(
64
+ compactor: compactor,
65
+ session: @session,
66
+ logger: @logger,
67
+ reasoning: reasoning,
68
+ )
69
+
70
+ # The shared env hash — passed to every pipeline.call()
71
+ @env = {
72
+ context: @context,
73
+ provider: @provider,
74
+ tools: @tool_classes,
75
+ input: nil,
76
+ params: {},
77
+ metadata: {},
78
+ tool_results: nil,
79
+ streaming: !!@stream,
80
+ callbacks: {
81
+ on_content: on_content,
82
+ on_reasoning: on_reasoning,
83
+ on_tool_call: on_tool_call,
84
+ on_tool_result: on_tool_result,
85
+ },
86
+ }
87
+ end
88
+
89
+ # Run a single user turn. Loops internally until the agent either
90
+ # completes (no more tool calls) or hits a limit.
91
+ #
92
+ # Returns the final assistant response.
93
+ def run(user_message)
94
+ @request_count = 0
95
+
96
+ # Build the initial prompt with system message on first turn
97
+ input = if first_turn?
98
+ @context.prompt do |p|
99
+ p.system @system_prompt
100
+ p.user user_message
101
+ end
102
+ else
103
+ user_message
104
+ end
105
+
106
+ # --- First LLM call ---
107
+ @env[:input] = input
108
+ @env[:tool_results] = nil
109
+ last_response = @pipeline.call(@env)
110
+ sync_context!
111
+
112
+ # --- Agent loop ---
113
+ loop do
114
+ break if @context.functions.empty?
115
+
116
+ # Collect tool results.
117
+ # Streaming: tools already spawned threads during the LLM response — just join them.
118
+ # Non-streaming: execute manually (parallel or sequential).
119
+ results = if @stream && !@stream.queue.empty?
120
+ @context.wait(:thread)
121
+ else
122
+ execute_tool_calls
123
+ end
124
+
125
+ # Send results back through the pipeline
126
+ @env[:input] = results
127
+ @env[:tool_results] = extract_tool_result_pairs(results)
128
+ last_response = @pipeline.call(@env)
129
+ sync_context!
130
+
131
+ @request_count += 1
132
+
133
+ # Check limits
134
+ break if @context.functions.empty?
135
+ break if @request_count >= MAX_REQUESTS_PER_TURN
136
+ break if @env[:metadata][:tool_error_limit_reached]
137
+ end
138
+
139
+ last_response
140
+ end
141
+
142
+ private
143
+
144
+ # ------------------------------------------------------------------
145
+ # Pipeline construction
146
+ # ------------------------------------------------------------------
147
+
148
+ def build_pipeline(compactor:, session:, logger:, reasoning:)
149
+ sys_prompt = @system_prompt
150
+ tools = @tool_classes
151
+
152
+ Pipeline.new do
153
+ # Outermost: timing and logging (sees total elapsed including retries)
154
+ use Middleware::Tracing, logger: logger
155
+
156
+ # Retry transient errors (wraps everything below)
157
+ use Middleware::Retry
158
+
159
+ # Save after each successful LLM call
160
+ use Middleware::SessionPersistence, session: session
161
+
162
+ # Track cumulative token usage
163
+ use Middleware::TokenTracking
164
+
165
+ # Check context size and compact if needed
166
+ use Middleware::CompactionCheck,
167
+ compactor: compactor,
168
+ system_prompt: sys_prompt,
169
+ tools: tools
170
+
171
+ # Track per-tool errors
172
+ use Middleware::ToolErrorTracking
173
+
174
+ # Detect and break doom loops (pre-call)
175
+ use Middleware::DoomLoopDetection
176
+
177
+ # Handle reasoning params and model-switch normalization (pre-call)
178
+ use Middleware::ReasoningNormalizer, **reasoning unless reasoning.empty?
179
+
180
+ # Innermost: the actual LLM call
181
+ run Middleware::LLMCall.new
182
+ end
183
+ end
184
+
185
+ # ------------------------------------------------------------------
186
+ # Tool execution
187
+ # ------------------------------------------------------------------
188
+
189
+ def execute_tool_calls
190
+ pending = @context.functions.to_a
191
+ return execute_sequential(pending) if pending.size <= 1
192
+
193
+ execute_parallel(pending)
194
+ end
195
+
196
+ # Run a single tool call synchronously.
197
+ def execute_sequential(functions)
198
+ on_call = @env.dig(:callbacks, :on_tool_call)
199
+ on_result = @env.dig(:callbacks, :on_tool_result)
200
+
201
+ functions.map do |fn|
202
+ on_call&.call(fn.name, fn.arguments)
203
+ result = fn.call
204
+ on_result&.call(fn.name, result_value(result))
205
+ result
206
+ end
207
+ end
208
+
209
+ # Run all pending tool calls concurrently via Async::Barrier.
210
+ #
211
+ # Each tool runs in its own fiber. File-mutating tools are safe because
212
+ # they go through FileMutationQueue, whose Mutex is fiber-scheduler-aware
213
+ # in Ruby 3.4 — a fiber blocked on a per-file mutex yields to other
214
+ # fibers instead of blocking the thread.
215
+ #
216
+ # The barrier is stored in @barrier so abort! can cancel in-flight tools.
217
+ #
218
+ def execute_parallel(functions)
219
+ on_call = @env.dig(:callbacks, :on_tool_call)
220
+ on_result = @env.dig(:callbacks, :on_tool_result)
221
+
222
+ results = Array.new(functions.size)
223
+
224
+ Async do
225
+ @barrier = Async::Barrier.new
226
+
227
+ functions.each_with_index do |fn, i|
228
+ @barrier.async do
229
+ on_call&.call(fn.name, fn.arguments)
230
+ results[i] = fn.call
231
+ r = results[i]
232
+ on_result&.call(r.name, result_value(r))
233
+ end
234
+ end
235
+
236
+ @barrier.wait
237
+ ensure
238
+ @barrier&.stop
239
+ @barrier = nil
240
+ end
241
+
242
+ results
243
+ end
244
+
245
+ public
246
+
247
+ # Cancel any in-flight tool execution. Safe to call from a signal
248
+ # handler, another thread, or an interface layer (TUI, web, RPC).
249
+ #
250
+ # When called, Async::Stop is raised in each running fiber, unwinding
251
+ # through ensure blocks — so FileMutationQueue mutexes release cleanly
252
+ # and SnapshotStore stays consistent.
253
+ #
254
+ def abort!
255
+ @barrier&.stop
256
+ end
257
+
258
+ private
259
+
260
+ # ------------------------------------------------------------------
261
+ # Helpers
262
+ # ------------------------------------------------------------------
263
+
264
+ # After a pipeline call, the compaction middleware may have replaced
265
+ # the context. Sync our local reference.
266
+ def sync_context!
267
+ @context = @env[:context]
268
+ end
269
+
270
+ def first_turn?
271
+ @context.messages.to_a.empty?
272
+ end
273
+
274
+ def result_value(result)
275
+ result.respond_to?(:value) ? result.value : result
276
+ end
277
+
278
+ # Build [name, value] pairs from tool results for ToolErrorTracking.
279
+ def extract_tool_result_pairs(results)
280
+ results.filter_map do |r|
281
+ name = r.respond_to?(:name) ? r.name : "unknown"
282
+ val = result_value(r)
283
+ [name, val]
284
+ end
285
+ end
286
+
287
+ # Load AGENTS.md or .brute/rules from the working directory.
288
+ def load_custom_rules
289
+ candidates = [
290
+ File.join(@cwd, "AGENTS.md"),
291
+ File.join(@cwd, ".brute", "rules.md"),
292
+ ]
293
+ found = candidates.find { |p| File.exist?(p) }
294
+ found ? File.read(found) : nil
295
+ end
296
+ end
297
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Monkey-patch: Fix Anthropic tool result message role.
4
+ #
5
+ # llm.rb stores tool results as messages with role="tool" (via @llm.tool_role).
6
+ # Anthropic's API requires tool result messages to have role="user" with
7
+ # tool_result content blocks. The Completion adapter already correctly formats
8
+ # the content (Function::Return -> {type: "tool_result", ...}), but passes
9
+ # through the "tool" role unchanged — which Anthropic rejects.
10
+ #
11
+ # This patch overrides adapt_message to set role="user" when the message
12
+ # content contains tool returns.
13
+
14
+ module Brute
15
+ module Patches
16
+ module AnthropicToolRole
17
+ private
18
+
19
+ def adapt_message
20
+ if message.respond_to?(:role) && message.role.to_s == "tool"
21
+ {role: "user", content: adapt_content(content)}
22
+ else
23
+ super
24
+ end
25
+ end
26
+
27
+ # Apply the patch lazily — LLM::Anthropic is autoloaded.
28
+ def self.apply!
29
+ return if @applied
30
+ @applied = true
31
+ LLM::Anthropic::RequestAdapter::Completion.prepend(self)
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Monkey-patch: Guard LLM::Buffer against nil entries.
4
+ #
5
+ # llm.rb's Context#talk can sometimes concatenate nil into the message
6
+ # buffer (e.g. when response parsing yields a nil choice). This causes
7
+ # NoMethodError when the buffer is iterated (assistant?, tool_return?, etc).
8
+ #
9
+ # This patch overrides concat to filter out nils before they enter the buffer.
10
+
11
+ module Brute
12
+ module Patches
13
+ module BufferNilGuard
14
+ def concat(messages)
15
+ super(Array(messages).compact)
16
+ end
17
+ end
18
+ end
19
+ end
20
+
21
+ LLM::Buffer.prepend(Brute::Patches::BufferNilGuard)
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Brute
4
+ # Rack-style middleware pipeline for LLM calls.
5
+ #
6
+ # Each middleware wraps the next, forming an onion model:
7
+ #
8
+ # Tracing → Retry → DoomLoop → Reasoning → [LLM Call] → Reasoning → DoomLoop → Retry → Tracing
9
+ #
10
+ # The innermost "app" is the actual LLM call. Each middleware can:
11
+ # - Modify the env (context, params) BEFORE the call (pre-processing)
12
+ # - Modify or inspect the response AFTER the call (post-processing)
13
+ # - Short-circuit (return without calling inner app)
14
+ # - Retry (call inner app multiple times)
15
+ #
16
+ # ## The env hash
17
+ #
18
+ # {
19
+ # context: LLM::Context, # conversation state
20
+ # provider: LLM::Provider, # the LLM provider
21
+ # input: <prompt/results>, # what to pass to context.talk()
22
+ # tools: [Tool, ...], # tool classes
23
+ # params: {}, # extra LLM call params (reasoning config, etc.)
24
+ # metadata: {}, # shared scratchpad for middleware state
25
+ # callbacks: {}, # :on_content, :on_tool_call, :on_tool_result
26
+ # }
27
+ #
28
+ # ## The response
29
+ #
30
+ # The return value of call(env) is the LLM::Message from context.talk().
31
+ #
32
+ # ## Building a pipeline
33
+ #
34
+ # pipeline = Brute::Pipeline.new do
35
+ # use Brute::Middleware::Tracing, logger: logger
36
+ # use Brute::Middleware::Retry, max_attempts: 3
37
+ # use Brute::Middleware::SessionPersistence, session: session
38
+ # run Brute::Middleware::LLMCall.new
39
+ # end
40
+ #
41
+ # response = pipeline.call(env)
42
+ #
43
+ class Pipeline
44
+ def initialize(&block)
45
+ @middlewares = []
46
+ @app = nil
47
+ instance_eval(&block) if block
48
+ end
49
+
50
+ # Register a middleware class.
51
+ # The class must implement `initialize(app, *args, **kwargs)` and `call(env)`.
52
+ def use(klass, *args, **kwargs, &block)
53
+ @middlewares << [klass, args, kwargs, block]
54
+ self
55
+ end
56
+
57
+ # Set the terminal app (innermost handler).
58
+ def run(app)
59
+ @app = app
60
+ self
61
+ end
62
+
63
+ # Build the full middleware chain and call it.
64
+ def call(env)
65
+ build.call(env)
66
+ end
67
+
68
+ # Build the chain without calling it. Useful for inspection or caching.
69
+ def build
70
+ raise "Pipeline has no terminal app — call `run` first" unless @app
71
+
72
+ @middlewares.reverse.inject(@app) do |inner, (klass, args, kwargs, block)|
73
+ if block
74
+ klass.new(inner, *args, **kwargs, &block)
75
+ else
76
+ klass.new(inner, *args, **kwargs)
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end