brute 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/brute/agent_stream.rb +49 -0
- data/lib/brute/compactor.rb +105 -0
- data/lib/brute/doom_loop.rb +84 -0
- data/lib/brute/file_mutation_queue.rb +99 -0
- data/lib/brute/hooks.rb +84 -0
- data/lib/brute/middleware/base.rb +27 -0
- data/lib/brute/middleware/compaction_check.rb +56 -0
- data/lib/brute/middleware/doom_loop_detection.rb +33 -0
- data/lib/brute/middleware/llm_call.rb +28 -0
- data/lib/brute/middleware/reasoning_normalizer.rb +98 -0
- data/lib/brute/middleware/retry.rb +45 -0
- data/lib/brute/middleware/session_persistence.rb +29 -0
- data/lib/brute/middleware/token_tracking.rb +46 -0
- data/lib/brute/middleware/tool_error_tracking.rb +46 -0
- data/lib/brute/middleware/tracing.rb +34 -0
- data/lib/brute/orchestrator.rb +297 -0
- data/lib/brute/patches/anthropic_tool_role.rb +35 -0
- data/lib/brute/patches/buffer_nil_guard.rb +21 -0
- data/lib/brute/pipeline.rb +81 -0
- data/lib/brute/session.rb +86 -0
- data/lib/brute/snapshot_store.rb +49 -0
- data/lib/brute/system_prompt.rb +88 -0
- data/lib/brute/todo_store.rb +27 -0
- data/lib/brute/tools/delegate.rb +35 -0
- data/lib/brute/tools/fs_patch.rb +37 -0
- data/lib/brute/tools/fs_read.rb +37 -0
- data/lib/brute/tools/fs_remove.rb +31 -0
- data/lib/brute/tools/fs_search.rb +38 -0
- data/lib/brute/tools/fs_undo.rb +29 -0
- data/lib/brute/tools/fs_write.rb +26 -0
- data/lib/brute/tools/net_fetch.rb +37 -0
- data/lib/brute/tools/shell.rb +38 -0
- data/lib/brute/tools/todo_read.rb +15 -0
- data/lib/brute/tools/todo_write.rb +32 -0
- data/lib/brute.rb +121 -0
- metadata +101 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: d81acc813055cd8621c71b3d1c444b9c39cf9c316b0e4b41eddab60a3f27f85a
|
|
4
|
+
data.tar.gz: 746bd2d574c6203e80153d0c747e55cecff746b5aa61ae1b38c9471aaee8feef
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: c18662ee0a25508ebd7df4015454b4b6e77eb7fa7e5a3f69fe2b9b1e64a8f0392fd1478cde5e5f310c85e1a683302346cc0528cb20beab80705029331ed7e4e7
|
|
7
|
+
data.tar.gz: 60047800ecee00b2c959ca9f12385540b2be949ebfffa3f16312dc534a0342a82e5bebe0227228461e2fcba634a76a38df7d3c70bcada0c40a3be37f82c156c0
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Brute
|
|
4
|
+
# Bridges llm.rb's streaming callbacks to forge-rb's callback system.
|
|
5
|
+
#
|
|
6
|
+
# Text and reasoning chunks fire immediately as the LLM generates them.
|
|
7
|
+
# Tool calls spawn threads on arrival — tools start running while the
|
|
8
|
+
# response is still streaming. on_tool_result fires as each thread finishes.
|
|
9
|
+
#
|
|
10
|
+
class AgentStream < LLM::Stream
|
|
11
|
+
def initialize(on_content: nil, on_reasoning: nil, on_tool_call: nil, on_tool_result: nil)
|
|
12
|
+
@on_content = on_content
|
|
13
|
+
@on_reasoning = on_reasoning
|
|
14
|
+
@on_tool_call = on_tool_call
|
|
15
|
+
@on_tool_result = on_tool_result
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def on_content(text)
|
|
19
|
+
@on_content&.call(text)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def on_reasoning_content(text)
|
|
23
|
+
@on_reasoning&.call(text)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def on_tool_call(tool, error)
|
|
27
|
+
@on_tool_call&.call(tool.name, tool.arguments)
|
|
28
|
+
|
|
29
|
+
if error
|
|
30
|
+
queue << error
|
|
31
|
+
@on_tool_result&.call(tool.name, error.value)
|
|
32
|
+
else
|
|
33
|
+
queue << LLM::Function::Task.new(spawn_with_callback(tool))
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
private
|
|
38
|
+
|
|
39
|
+
def spawn_with_callback(tool)
|
|
40
|
+
on_result = @on_tool_result
|
|
41
|
+
name = tool.name
|
|
42
|
+
Thread.new do
|
|
43
|
+
result = tool.call
|
|
44
|
+
on_result&.call(name, result.respond_to?(:value) ? result.value : result)
|
|
45
|
+
result
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Brute
|
|
4
|
+
# Context compaction service. When the conversation grows past configurable
|
|
5
|
+
# thresholds, older messages are summarized into a condensed form and the
|
|
6
|
+
# original messages are dropped, keeping the context window manageable.
|
|
7
|
+
#
|
|
8
|
+
# Modeled after forgecode's Compactor which uses an eviction window and
|
|
9
|
+
# retention window strategy.
|
|
10
|
+
class Compactor
|
|
11
|
+
DEFAULTS = {
|
|
12
|
+
token_threshold: 100_000, # Compact when estimated tokens exceed this
|
|
13
|
+
message_threshold: 200, # Compact when message count exceeds this
|
|
14
|
+
retention_window: 6, # Minimum recent messages to always keep
|
|
15
|
+
summary_model: nil, # Model for summarization (uses agent's model if nil)
|
|
16
|
+
}.freeze
|
|
17
|
+
|
|
18
|
+
attr_reader :config
|
|
19
|
+
|
|
20
|
+
def initialize(provider, **opts)
|
|
21
|
+
@provider = provider
|
|
22
|
+
@config = DEFAULTS.merge(opts)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Check whether compaction should run based on current context state.
|
|
26
|
+
def should_compact?(messages, usage: nil)
|
|
27
|
+
return true if messages.size > @config[:message_threshold]
|
|
28
|
+
return true if usage && (usage.total_tokens || 0) > @config[:token_threshold]
|
|
29
|
+
false
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Compact the message history by summarizing older messages.
|
|
33
|
+
#
|
|
34
|
+
# Returns [summary_message, kept_messages] — the caller rebuilds
|
|
35
|
+
# the context from these.
|
|
36
|
+
def compact(messages)
|
|
37
|
+
total = messages.size
|
|
38
|
+
keep_count = [@config[:retention_window], total].min
|
|
39
|
+
return nil if total <= keep_count
|
|
40
|
+
|
|
41
|
+
old_messages = messages[0...(total - keep_count)]
|
|
42
|
+
recent_messages = messages[(total - keep_count)..]
|
|
43
|
+
|
|
44
|
+
summary_text = summarize(old_messages)
|
|
45
|
+
|
|
46
|
+
[summary_text, recent_messages]
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
private
|
|
50
|
+
|
|
51
|
+
def summarize(messages)
|
|
52
|
+
# Build a condensed representation of the conversation for the summarizer
|
|
53
|
+
conversation_text = messages.map { |m|
|
|
54
|
+
role = if m.respond_to?(:role)
|
|
55
|
+
m.role.to_s
|
|
56
|
+
else
|
|
57
|
+
"unknown"
|
|
58
|
+
end
|
|
59
|
+
content = if m.respond_to?(:content)
|
|
60
|
+
m.content.to_s[0..1000]
|
|
61
|
+
else
|
|
62
|
+
m.to_s[0..1000]
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Include tool call info for assistant messages
|
|
66
|
+
tool_info = ""
|
|
67
|
+
if m.respond_to?(:functions) && m.functions&.any?
|
|
68
|
+
calls = m.functions.map { |f| "#{f.name}(#{f.arguments.to_s[0..200]})" }
|
|
69
|
+
tool_info = " [tools: #{calls.join(", ")}]"
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
"#{role}:#{tool_info} #{content}"
|
|
73
|
+
}.join("\n---\n")
|
|
74
|
+
|
|
75
|
+
prompt = <<~PROMPT
|
|
76
|
+
Summarize this conversation history for context continuity. The summary will replace
|
|
77
|
+
these messages in the context window, so include everything the agent needs to continue
|
|
78
|
+
working effectively.
|
|
79
|
+
|
|
80
|
+
Structure your summary as:
|
|
81
|
+
## Goal
|
|
82
|
+
What the user asked for.
|
|
83
|
+
|
|
84
|
+
## Progress
|
|
85
|
+
- Files read, created, or modified (list paths)
|
|
86
|
+
- Commands executed and their outcomes
|
|
87
|
+
- Key decisions made
|
|
88
|
+
|
|
89
|
+
## Current State
|
|
90
|
+
Where things stand right now — what's done and what remains.
|
|
91
|
+
|
|
92
|
+
## Next Steps
|
|
93
|
+
What should happen next based on the conversation.
|
|
94
|
+
|
|
95
|
+
---
|
|
96
|
+
CONVERSATION:
|
|
97
|
+
#{conversation_text}
|
|
98
|
+
PROMPT
|
|
99
|
+
|
|
100
|
+
model = @config[:summary_model] || "claude-sonnet-4-20250514"
|
|
101
|
+
res = @provider.complete(prompt, model: model)
|
|
102
|
+
res.content
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Brute
|
|
4
|
+
# Detects when the agent is stuck in a repeating pattern of tool calls.
|
|
5
|
+
#
|
|
6
|
+
# Two types of loops are detected:
|
|
7
|
+
# 1. Consecutive identical calls: [A, A, A] — same tool + same args
|
|
8
|
+
# 2. Repeating sequences: [A,B,C, A,B,C, A,B,C] — a pattern cycling
|
|
9
|
+
#
|
|
10
|
+
# When detected, a warning is injected into the context so the LLM
|
|
11
|
+
# can course-correct.
|
|
12
|
+
class DoomLoopDetector
|
|
13
|
+
DEFAULT_THRESHOLD = 3
|
|
14
|
+
|
|
15
|
+
attr_reader :threshold
|
|
16
|
+
|
|
17
|
+
def initialize(threshold: DEFAULT_THRESHOLD)
|
|
18
|
+
@threshold = threshold
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Extracts tool call signatures from the context's message buffer and
|
|
22
|
+
# checks for repeating patterns at the tail.
|
|
23
|
+
#
|
|
24
|
+
# Returns the repetition count if a loop is found, nil otherwise.
|
|
25
|
+
def detect(messages)
|
|
26
|
+
signatures = extract_signatures(messages)
|
|
27
|
+
return nil if signatures.size < @threshold
|
|
28
|
+
|
|
29
|
+
check_repeating_pattern(signatures)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Build a human-readable warning message for the agent.
|
|
33
|
+
def warning_message(repetitions)
|
|
34
|
+
<<~MSG
|
|
35
|
+
SYSTEM NOTICE: Doom loop detected — the same tool call pattern has repeated #{repetitions} times.
|
|
36
|
+
You are stuck in a loop and not making progress. Stop and try a fundamentally different approach:
|
|
37
|
+
- Re-read the file to check your changes actually applied
|
|
38
|
+
- Try a different tool or strategy
|
|
39
|
+
- Break the problem into smaller steps
|
|
40
|
+
- If a command keeps failing, investigate why before retrying
|
|
41
|
+
MSG
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
private
|
|
45
|
+
|
|
46
|
+
# Extract [tool_name, arguments_json] pairs from assistant messages.
|
|
47
|
+
def extract_signatures(messages)
|
|
48
|
+
messages
|
|
49
|
+
.select { |m| m.respond_to?(:functions) && m.assistant? }
|
|
50
|
+
.flat_map { |m| m.functions.map { |f| [f.name.to_s, f.arguments.to_s] } }
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Check for repeating patterns of any length at the tail of the sequence.
|
|
54
|
+
# Returns the repetition count, or nil.
|
|
55
|
+
def check_repeating_pattern(sequence)
|
|
56
|
+
max_pattern_len = sequence.size / @threshold
|
|
57
|
+
|
|
58
|
+
(1..max_pattern_len).each do |pattern_len|
|
|
59
|
+
count = count_tail_repetitions(sequence, pattern_len)
|
|
60
|
+
return count if count >= @threshold
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
nil
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Count how many times a pattern of `length` repeats at the end of the sequence.
|
|
67
|
+
def count_tail_repetitions(sequence, length)
|
|
68
|
+
return 0 if sequence.size < length
|
|
69
|
+
|
|
70
|
+
pattern = sequence.last(length)
|
|
71
|
+
count = 1
|
|
72
|
+
pos = sequence.size - length
|
|
73
|
+
|
|
74
|
+
while pos >= length
|
|
75
|
+
candidate = sequence[(pos - length)...pos]
|
|
76
|
+
break unless candidate == pattern
|
|
77
|
+
count += 1
|
|
78
|
+
pos -= length
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
count
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Brute
|
|
4
|
+
# Per-file serialization queue for concurrent tool execution.
|
|
5
|
+
#
|
|
6
|
+
# When tools run in parallel (via threads or async fibers), multiple tools
|
|
7
|
+
# may target the same file simultaneously. Without serialization, a sequence
|
|
8
|
+
# like [read → patch → write] on the same file would race and lose edits.
|
|
9
|
+
#
|
|
10
|
+
# This module provides a single public method:
|
|
11
|
+
#
|
|
12
|
+
# Brute::FileMutationQueue.serialize("/path/to/file") do
|
|
13
|
+
# # snapshot + read + modify + write — all atomic for this path
|
|
14
|
+
# end
|
|
15
|
+
#
|
|
16
|
+
# Design (mirrors pi-mono's withFileMutationQueue):
|
|
17
|
+
# - Operations on the SAME file are serialized (run one at a time)
|
|
18
|
+
# - Operations on DIFFERENT files run fully in parallel (independent mutexes)
|
|
19
|
+
# - Symlink-aware: resolves real paths so aliases share one mutex
|
|
20
|
+
# - Error-safe: mutex is always released in `ensure`, so failures never deadlock
|
|
21
|
+
# - Self-cleaning: per-file mutexes are removed when no longer in use
|
|
22
|
+
#
|
|
23
|
+
# Ruby 3.4's Mutex is fiber-scheduler-aware, so this works correctly with
|
|
24
|
+
# both :thread and :task (Async) concurrency strategies.
|
|
25
|
+
#
|
|
26
|
+
module FileMutationQueue
|
|
27
|
+
@mutexes = {} # path → Mutex
|
|
28
|
+
@waiters = Hash.new(0) # path → number of threads/fibers waiting or holding
|
|
29
|
+
@guard = Mutex.new # protects @mutexes and @waiters
|
|
30
|
+
|
|
31
|
+
class << self
|
|
32
|
+
# Serialize a block of work for a given file path.
|
|
33
|
+
#
|
|
34
|
+
# Concurrent calls targeting the same canonical path will execute
|
|
35
|
+
# sequentially in FIFO order. Calls targeting different paths
|
|
36
|
+
# proceed in parallel with zero contention.
|
|
37
|
+
#
|
|
38
|
+
# @param path [String] The file path to serialize on.
|
|
39
|
+
# @yield The mutation work to perform (snapshot, read, write, etc.)
|
|
40
|
+
# @return Whatever the block returns.
|
|
41
|
+
def serialize(path, &block)
|
|
42
|
+
key = canonical_path(path)
|
|
43
|
+
mutex = acquire_mutex(key)
|
|
44
|
+
|
|
45
|
+
mutex.synchronize(&block)
|
|
46
|
+
ensure
|
|
47
|
+
release_mutex(key)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Clear all tracked mutexes. Used in tests and session resets.
|
|
51
|
+
def clear!
|
|
52
|
+
@guard.synchronize do
|
|
53
|
+
@mutexes.clear
|
|
54
|
+
@waiters.clear
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Number of file paths currently tracked (for diagnostics).
|
|
59
|
+
def size
|
|
60
|
+
@guard.synchronize { @mutexes.size }
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
private
|
|
64
|
+
|
|
65
|
+
# Resolve a file path to a canonical key.
|
|
66
|
+
# Uses File.realpath to follow symlinks so that aliases to the
|
|
67
|
+
# same underlying file share one mutex. Falls back to
|
|
68
|
+
# File.expand_path for files that don't exist yet (e.g., new writes).
|
|
69
|
+
def canonical_path(path)
|
|
70
|
+
resolved = File.expand_path(path)
|
|
71
|
+
begin
|
|
72
|
+
File.realpath(resolved)
|
|
73
|
+
rescue Errno::ENOENT
|
|
74
|
+
resolved
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Get (or create) a mutex for a file path and increment the waiter count.
|
|
79
|
+
def acquire_mutex(key)
|
|
80
|
+
@guard.synchronize do
|
|
81
|
+
@mutexes[key] ||= Mutex.new
|
|
82
|
+
@waiters[key] += 1
|
|
83
|
+
@mutexes[key]
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Decrement the waiter count and clean up the mutex if no one else needs it.
|
|
88
|
+
def release_mutex(key)
|
|
89
|
+
@guard.synchronize do
|
|
90
|
+
@waiters[key] -= 1
|
|
91
|
+
if @waiters[key] <= 0
|
|
92
|
+
@mutexes.delete(key)
|
|
93
|
+
@waiters.delete(key)
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
data/lib/brute/hooks.rb
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Brute
|
|
4
|
+
# Lifecycle hook system modeled after forgecode's Hook struct.
|
|
5
|
+
#
|
|
6
|
+
# Six lifecycle events fire during the orchestrator loop:
|
|
7
|
+
# :start — conversation processing begins
|
|
8
|
+
# :end — conversation processing ends
|
|
9
|
+
# :request — before each LLM API call
|
|
10
|
+
# :response — after each LLM response
|
|
11
|
+
# :toolcall_start — before a tool executes
|
|
12
|
+
# :toolcall_end — after a tool executes
|
|
13
|
+
#
|
|
14
|
+
# Hooks receive (event_name, context_hash) and can inspect or mutate
|
|
15
|
+
# the orchestrator state via the context hash.
|
|
16
|
+
module Hooks
|
|
17
|
+
# Base class. Subclass and override #on_<event> methods.
|
|
18
|
+
class Base
|
|
19
|
+
def call(event, **data)
|
|
20
|
+
method_name = :"on_#{event}"
|
|
21
|
+
send(method_name, **data) if respond_to?(method_name, true)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
private
|
|
25
|
+
|
|
26
|
+
def on_start(**) = nil
|
|
27
|
+
def on_end(**) = nil
|
|
28
|
+
def on_request(**) = nil
|
|
29
|
+
def on_response(**) = nil
|
|
30
|
+
def on_toolcall_start(**) = nil
|
|
31
|
+
def on_toolcall_end(**) = nil
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Composes multiple hooks into one, firing them in order.
|
|
35
|
+
class Composite < Base
|
|
36
|
+
def initialize(*hooks)
|
|
37
|
+
@hooks = hooks
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def call(event, **data)
|
|
41
|
+
@hooks.each { |h| h.call(event, **data) }
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def <<(hook)
|
|
45
|
+
@hooks << hook
|
|
46
|
+
self
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Logs lifecycle events to a logger.
|
|
51
|
+
class Logging < Base
|
|
52
|
+
def initialize(logger)
|
|
53
|
+
@logger = logger
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
private
|
|
57
|
+
|
|
58
|
+
def on_start(**)
|
|
59
|
+
@logger.info("[brute] Conversation started")
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def on_end(**)
|
|
63
|
+
@logger.info("[brute] Conversation ended")
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def on_request(request_count: 0, **)
|
|
67
|
+
@logger.debug("[brute] LLM request ##{request_count}")
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def on_response(tokens: nil, **)
|
|
71
|
+
@logger.debug("[brute] LLM response (tokens: #{tokens || "?"})")
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def on_toolcall_start(tool_name: nil, **)
|
|
75
|
+
@logger.info("[brute] Tool call: #{tool_name}")
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def on_toolcall_end(tool_name: nil, error: false, **)
|
|
79
|
+
status = error ? "FAILED" : "ok"
|
|
80
|
+
@logger.info("[brute] Tool result: #{tool_name} [#{status}]")
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Brute
|
|
4
|
+
module Middleware
|
|
5
|
+
# Base class for all middleware. Provides the standard Rack-style pattern:
|
|
6
|
+
#
|
|
7
|
+
# def call(env)
|
|
8
|
+
# # pre-processing
|
|
9
|
+
# response = @app.call(env)
|
|
10
|
+
# # post-processing
|
|
11
|
+
# response
|
|
12
|
+
# end
|
|
13
|
+
#
|
|
14
|
+
# Subclasses MUST call @app.call(env) unless they are intentionally
|
|
15
|
+
# short-circuiting (e.g., returning a cached response).
|
|
16
|
+
#
|
|
17
|
+
class Base
|
|
18
|
+
def initialize(app)
|
|
19
|
+
@app = app
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def call(env)
|
|
23
|
+
@app.call(env)
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Brute
|
|
4
|
+
module Middleware
|
|
5
|
+
# Checks context size after each LLM call and triggers compaction
|
|
6
|
+
# when thresholds are exceeded.
|
|
7
|
+
#
|
|
8
|
+
# Runs POST-call: inspects message count and token usage from the
|
|
9
|
+
# response. If compaction is needed, summarizes older messages and
|
|
10
|
+
# rebuilds the context with the summary + recent messages.
|
|
11
|
+
#
|
|
12
|
+
class CompactionCheck < Base
|
|
13
|
+
def initialize(app, compactor:, system_prompt:, tools:)
|
|
14
|
+
super(app)
|
|
15
|
+
@compactor = compactor
|
|
16
|
+
@system_prompt = system_prompt
|
|
17
|
+
@tools = tools
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def call(env)
|
|
21
|
+
response = @app.call(env)
|
|
22
|
+
|
|
23
|
+
ctx = env[:context]
|
|
24
|
+
messages = ctx.messages.to_a.compact
|
|
25
|
+
usage = ctx.usage rescue nil
|
|
26
|
+
|
|
27
|
+
if @compactor.should_compact?(messages, usage: usage)
|
|
28
|
+
result = @compactor.compact(messages)
|
|
29
|
+
if result
|
|
30
|
+
summary_text, _recent = result
|
|
31
|
+
rebuild_context!(env, summary_text)
|
|
32
|
+
env[:metadata][:compaction] = {
|
|
33
|
+
messages_before: messages.size,
|
|
34
|
+
timestamp: Time.now.iso8601,
|
|
35
|
+
}
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
response
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
private
|
|
43
|
+
|
|
44
|
+
def rebuild_context!(env, summary_text)
|
|
45
|
+
provider = env[:provider]
|
|
46
|
+
new_ctx = LLM::Context.new(provider, tools: @tools)
|
|
47
|
+
prompt = new_ctx.prompt do |p|
|
|
48
|
+
p.system @system_prompt
|
|
49
|
+
p.user "[Previous conversation summary]\n\n#{summary_text}"
|
|
50
|
+
end
|
|
51
|
+
new_ctx.talk(prompt)
|
|
52
|
+
env[:context] = new_ctx
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Brute
|
|
4
|
+
module Middleware
|
|
5
|
+
# Detects when the agent is stuck repeating tool call patterns and injects
|
|
6
|
+
# a corrective warning into the context before the next LLM call.
|
|
7
|
+
#
|
|
8
|
+
# Runs PRE-call: inspects the conversation history for repeating tool call
|
|
9
|
+
# patterns. If detected, talks a warning message into the context so the
|
|
10
|
+
# LLM sees it as input alongside the normal tool results.
|
|
11
|
+
#
|
|
12
|
+
class DoomLoopDetection < Base
|
|
13
|
+
def initialize(app, threshold: 3)
|
|
14
|
+
super(app)
|
|
15
|
+
@detector = Brute::DoomLoopDetector.new(threshold: threshold)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def call(env)
|
|
19
|
+
ctx = env[:context]
|
|
20
|
+
messages = ctx.messages.to_a
|
|
21
|
+
|
|
22
|
+
if (reps = @detector.detect(messages))
|
|
23
|
+
warning = @detector.warning_message(reps)
|
|
24
|
+
# Inject the warning as a user message so the LLM sees it
|
|
25
|
+
ctx.talk(warning)
|
|
26
|
+
env[:metadata][:doom_loop_detected] = reps
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
@app.call(env)
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Brute
|
|
4
|
+
module Middleware
|
|
5
|
+
# The terminal "app" in the pipeline — performs the actual LLM call.
|
|
6
|
+
#
|
|
7
|
+
# When streaming, on_content fires incrementally via AgentStream.
|
|
8
|
+
# When not streaming, fires on_content post-hoc with the full text.
|
|
9
|
+
#
|
|
10
|
+
class LLMCall
|
|
11
|
+
def call(env)
|
|
12
|
+
ctx = env[:context]
|
|
13
|
+
response = ctx.talk(env[:input])
|
|
14
|
+
|
|
15
|
+
# Only fire on_content post-hoc when NOT streaming
|
|
16
|
+
# (streaming delivers chunks incrementally via AgentStream)
|
|
17
|
+
unless env[:streaming]
|
|
18
|
+
if (cb = env.dig(:callbacks, :on_content)) && response
|
|
19
|
+
text = response.respond_to?(:content) ? response.content : nil
|
|
20
|
+
cb.call(text) if text
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
response
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Brute
|
|
4
|
+
module Middleware
|
|
5
|
+
# Handles reasoning/thinking content across model switches.
|
|
6
|
+
#
|
|
7
|
+
# PRE-call:
|
|
8
|
+
# - If reasoning is enabled, injects provider-specific params into
|
|
9
|
+
# the env (e.g., Anthropic thinking config, OpenAI reasoning_effort).
|
|
10
|
+
# - Tracks which model produced each message. When the model changes,
|
|
11
|
+
# strips reasoning_content from messages produced by the old model
|
|
12
|
+
# (signatures are model-specific and cryptographically tied).
|
|
13
|
+
#
|
|
14
|
+
# POST-call:
|
|
15
|
+
# - Records the current model on the response for future normalization.
|
|
16
|
+
#
|
|
17
|
+
# llm.rb exposes:
|
|
18
|
+
# - response.reasoning_content — the thinking text
|
|
19
|
+
# - response.reasoning_tokens — token count
|
|
20
|
+
# - Provider params pass-through — we can send thinking:, reasoning_effort:, etc.
|
|
21
|
+
#
|
|
22
|
+
class ReasoningNormalizer < Base
|
|
23
|
+
# Effort levels that map to provider-specific params.
|
|
24
|
+
# Mirrors forgecode's Effort enum.
|
|
25
|
+
EFFORT_LEVELS = {
|
|
26
|
+
none: "none",
|
|
27
|
+
minimal: "low",
|
|
28
|
+
low: "low",
|
|
29
|
+
medium: "medium",
|
|
30
|
+
high: "high",
|
|
31
|
+
xhigh: "high",
|
|
32
|
+
max: "high",
|
|
33
|
+
}.freeze
|
|
34
|
+
|
|
35
|
+
def initialize(app, model_id: nil, effort: :medium, enabled: true, budget_tokens: nil)
|
|
36
|
+
super(app)
|
|
37
|
+
@model_id = model_id
|
|
38
|
+
@effort = effort
|
|
39
|
+
@enabled = enabled
|
|
40
|
+
@budget_tokens = budget_tokens
|
|
41
|
+
@message_models = [] # tracks which model produced each assistant message
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def call(env)
|
|
45
|
+
if @enabled
|
|
46
|
+
inject_reasoning_params!(env)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
response = @app.call(env)
|
|
50
|
+
|
|
51
|
+
# POST: record which model produced this response
|
|
52
|
+
if response
|
|
53
|
+
@message_models << @model_id
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
response
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Update the active model (e.g., when user switches models mid-session).
|
|
60
|
+
def model_id=(new_model)
|
|
61
|
+
@model_id = new_model
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
private
|
|
65
|
+
|
|
66
|
+
def inject_reasoning_params!(env)
|
|
67
|
+
env[:params] ||= {}
|
|
68
|
+
provider = env[:provider]
|
|
69
|
+
|
|
70
|
+
case provider_type(provider)
|
|
71
|
+
when :anthropic
|
|
72
|
+
if @budget_tokens
|
|
73
|
+
# Older extended thinking API (claude-3.7-sonnet style)
|
|
74
|
+
env[:params][:thinking] = {type: "enabled", budget_tokens: @budget_tokens}
|
|
75
|
+
else
|
|
76
|
+
# Newer effort-based API (claude-4 style) — pass through
|
|
77
|
+
# Anthropic handles this via the model itself
|
|
78
|
+
end
|
|
79
|
+
when :openai
|
|
80
|
+
env[:params][:reasoning_effort] = EFFORT_LEVELS[@effort] || "medium"
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def provider_type(provider)
|
|
85
|
+
class_name = provider.class.name.to_s.downcase
|
|
86
|
+
if class_name.include?("anthropic")
|
|
87
|
+
:anthropic
|
|
88
|
+
elsif class_name.include?("openai")
|
|
89
|
+
:openai
|
|
90
|
+
elsif class_name.include?("google") || class_name.include?("gemini")
|
|
91
|
+
:google
|
|
92
|
+
else
|
|
93
|
+
:unknown
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|