brute 1.0.1 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/brute/agent.rb +74 -6
- data/lib/brute/events/handler.rb +69 -0
- data/lib/brute/events/prefixed_terminal_output.rb +72 -0
- data/lib/brute/events/terminal_output_handler.rb +68 -0
- data/lib/brute/middleware/001_otel_span.rb +77 -0
- data/lib/brute/middleware/003_tool_result_loop.rb +103 -0
- data/lib/brute/middleware/004_summarize.rb +139 -0
- data/lib/brute/middleware/005_tracing.rb +94 -0
- data/lib/brute/middleware/010_max_iterations.rb +73 -0
- data/lib/brute/middleware/015_otel_token_usage.rb +42 -0
- data/lib/brute/middleware/020_system_prompt.rb +128 -0
- data/lib/brute/middleware/040_compaction_check.rb +155 -0
- data/lib/brute/middleware/060_questions.rb +41 -0
- data/lib/brute/middleware/070_tool_call.rb +247 -0
- data/lib/brute/middleware/073_otel_tool_call.rb +49 -0
- data/lib/brute/middleware/075_otel_tool_results.rb +46 -0
- data/lib/brute/middleware/100_llm_call.rb +62 -0
- data/lib/brute/middleware/event_handler.rb +25 -0
- data/lib/brute/middleware/user_queue.rb +35 -0
- data/lib/brute/pipeline.rb +44 -107
- data/lib/brute/prompts/skills.rb +2 -2
- data/lib/brute/prompts.rb +23 -23
- data/lib/brute/providers/shell.rb +6 -19
- data/lib/brute/providers/shell_response.rb +22 -30
- data/lib/brute/session.rb +52 -0
- data/lib/brute/store/snapshot_store.rb +21 -37
- data/lib/brute/sub_agent.rb +106 -0
- data/lib/brute/system_prompt.rb +1 -83
- data/lib/brute/tool.rb +107 -0
- data/lib/brute/tools/delegate.rb +61 -70
- data/lib/brute/tools/fs_patch.rb +9 -7
- data/lib/brute/tools/fs_read.rb +233 -20
- data/lib/brute/tools/fs_remove.rb +8 -9
- data/lib/brute/tools/fs_search.rb +98 -16
- data/lib/brute/tools/fs_undo.rb +8 -8
- data/lib/brute/tools/fs_write.rb +7 -5
- data/lib/brute/tools/net_fetch.rb +8 -8
- data/lib/brute/tools/question.rb +36 -24
- data/lib/brute/tools/shell.rb +74 -16
- data/lib/brute/tools/todo_read.rb +8 -8
- data/lib/brute/tools/todo_write.rb +25 -18
- data/lib/brute/tools.rb +8 -12
- data/lib/brute/truncation.rb +219 -0
- data/lib/brute/version.rb +1 -1
- data/lib/brute.rb +82 -45
- metadata +59 -46
- data/lib/brute/loop/agent_stream.rb +0 -118
- data/lib/brute/loop/agent_turn.rb +0 -520
- data/lib/brute/loop/compactor.rb +0 -107
- data/lib/brute/loop/doom_loop.rb +0 -86
- data/lib/brute/loop/step.rb +0 -332
- data/lib/brute/loop/tool_call_step.rb +0 -90
- data/lib/brute/middleware/base.rb +0 -27
- data/lib/brute/middleware/compaction_check.rb +0 -106
- data/lib/brute/middleware/doom_loop_detection.rb +0 -136
- data/lib/brute/middleware/llm_call.rb +0 -128
- data/lib/brute/middleware/message_tracking.rb +0 -339
- data/lib/brute/middleware/otel/span.rb +0 -105
- data/lib/brute/middleware/otel/token_usage.rb +0 -68
- data/lib/brute/middleware/otel/tool_calls.rb +0 -68
- data/lib/brute/middleware/otel/tool_results.rb +0 -65
- data/lib/brute/middleware/otel.rb +0 -34
- data/lib/brute/middleware/reasoning_normalizer.rb +0 -192
- data/lib/brute/middleware/retry.rb +0 -157
- data/lib/brute/middleware/session_persistence.rb +0 -72
- data/lib/brute/middleware/token_tracking.rb +0 -124
- data/lib/brute/middleware/tool_error_tracking.rb +0 -179
- data/lib/brute/middleware/tool_use_guard.rb +0 -133
- data/lib/brute/middleware/tracing.rb +0 -124
- data/lib/brute/middleware.rb +0 -18
- data/lib/brute/orchestrator/turn.rb +0 -105
- data/lib/brute/patches/anthropic_tool_role.rb +0 -35
- data/lib/brute/patches/buffer_nil_guard.rb +0 -26
- data/lib/brute/providers/models_dev.rb +0 -111
- data/lib/brute/providers/ollama.rb +0 -135
- data/lib/brute/providers/opencode_go.rb +0 -43
- data/lib/brute/providers/opencode_zen.rb +0 -87
- data/lib/brute/providers.rb +0 -62
- data/lib/brute/queue/base_queue.rb +0 -222
- data/lib/brute/queue/parallel_queue.rb +0 -66
- data/lib/brute/queue/sequential_queue.rb +0 -63
- data/lib/brute/store/message_store.rb +0 -362
- data/lib/brute/store/session.rb +0 -106
- /data/lib/brute/{diff.rb → utils/diff.rb} +0 -0
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "bundler/setup"
|
|
4
|
+
require "brute"
|
|
5
|
+
|
|
6
|
+
module Brute
|
|
7
|
+
module Middleware
|
|
8
|
+
# Logs timing and token usage for every LLM call, and tracks cumulative
|
|
9
|
+
# timing data in env[:metadata][:timing].
|
|
10
|
+
#
|
|
11
|
+
# As the outermost middleware, it sees the full pipeline elapsed time per
|
|
12
|
+
# call. It also tracks total wall-clock time across all calls in a turn
|
|
13
|
+
# (including tool execution gaps between LLM calls).
|
|
14
|
+
#
|
|
15
|
+
# A new turn is detected when env[:current_iteration] == 1 (the agent
|
|
16
|
+
# loop resets this at the start of each turn).
|
|
17
|
+
#
|
|
18
|
+
# Stores in env[:metadata][:timing]:
|
|
19
|
+
# total_elapsed: wall-clock since the turn began (includes tool gaps)
|
|
20
|
+
# total_llm_elapsed: cumulative time spent inside LLM calls only
|
|
21
|
+
# llm_call_count: number of LLM calls so far
|
|
22
|
+
# last_call_elapsed: duration of the most recent LLM call
|
|
23
|
+
#
|
|
24
|
+
class Tracing
|
|
25
|
+
def initialize(app, logger:)
|
|
26
|
+
@app = app
|
|
27
|
+
|
|
28
|
+
@logger = logger
|
|
29
|
+
@call_count = 0
|
|
30
|
+
@total_llm_elapsed = 0.0
|
|
31
|
+
@turn_start = nil
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def call(env)
|
|
35
|
+
@call_count += 1
|
|
36
|
+
|
|
37
|
+
# Detect new turn via iteration counter
|
|
38
|
+
if env[:current_iteration] <= 1
|
|
39
|
+
@turn_start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
40
|
+
@total_llm_elapsed = 0.0
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
messages = env[:messages]
|
|
44
|
+
provider_name = env[:provider]&.respond_to?(:name) ? env[:provider].name : env[:provider].class.name
|
|
45
|
+
model_name = env[:model] || (env[:provider].default_model rescue "unknown")
|
|
46
|
+
@logger.debug("[brute] LLM call ##{@call_count} [#{provider_name}/#{model_name}] (#{messages.size} messages in context)")
|
|
47
|
+
|
|
48
|
+
start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
49
|
+
response = @app.call(env)
|
|
50
|
+
now = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
51
|
+
elapsed = now - start
|
|
52
|
+
|
|
53
|
+
@total_llm_elapsed += elapsed
|
|
54
|
+
|
|
55
|
+
tokens = if response.respond_to?(:usage) && (usage = response.usage)
|
|
56
|
+
read_token(usage, :total_tokens)
|
|
57
|
+
else
|
|
58
|
+
'?'
|
|
59
|
+
end
|
|
60
|
+
@logger.debug("[brute] LLM response ##{@call_count} [#{provider_name}/#{model_name}]: #{tokens} tokens, #{elapsed.round(2)}s")
|
|
61
|
+
env[:events] << { type: :log, data: "LLM response ##{@call_count}: #{tokens} tokens, #{elapsed.round(2)}s" } if response
|
|
62
|
+
|
|
63
|
+
env[:metadata][:timing] = {
|
|
64
|
+
total_elapsed: now - (@turn_start || start),
|
|
65
|
+
total_llm_elapsed: @total_llm_elapsed,
|
|
66
|
+
llm_call_count: @call_count,
|
|
67
|
+
last_call_elapsed: elapsed
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
if response.respond_to?(:usage) && (u = response.usage)
|
|
71
|
+
env[:metadata][:tokens] = {
|
|
72
|
+
total: read_token(u, :total_tokens),
|
|
73
|
+
total_input: read_token(u, :input_tokens),
|
|
74
|
+
total_output: read_token(u, :output_tokens),
|
|
75
|
+
}
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
response
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
private
|
|
82
|
+
|
|
83
|
+
def read_token(usage, method)
|
|
84
|
+
if usage.respond_to?(method)
|
|
85
|
+
usage.send(method).to_i
|
|
86
|
+
elsif usage.respond_to?(:[])
|
|
87
|
+
(usage[method] || usage[method.to_s]).to_i
|
|
88
|
+
else
|
|
89
|
+
0
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "bundler/setup"
|
|
4
|
+
require "brute"
|
|
5
|
+
|
|
6
|
+
module Brute
|
|
7
|
+
module Middleware
|
|
8
|
+
# Guards against runaway tool loops by capping the number of iterations.
|
|
9
|
+
#
|
|
10
|
+
# When the limit is reached, injects a user message into the session
|
|
11
|
+
# stating that maximum iterations have been reached. This causes
|
|
12
|
+
# ToolResultLoop to exit its loop naturally (last message is not :tool).
|
|
13
|
+
#
|
|
14
|
+
class MaxIterations
|
|
15
|
+
|
|
16
|
+
DEFAULT_MAX_ITERATIONS = 100
|
|
17
|
+
|
|
18
|
+
def initialize(app, max_iterations: DEFAULT_MAX_ITERATIONS)
|
|
19
|
+
@app = app
|
|
20
|
+
@max_iterations = max_iterations
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def call(env)
|
|
24
|
+
if max_iterations_reached?(env)
|
|
25
|
+
env[:messages] << RubyLLM::Message.new(
|
|
26
|
+
role: :user,
|
|
27
|
+
content: "Maximum iterations reached.",
|
|
28
|
+
)
|
|
29
|
+
else
|
|
30
|
+
@app.call(env)
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
private
|
|
35
|
+
|
|
36
|
+
def max_iterations_reached?(env)
|
|
37
|
+
env[:current_iteration] > @max_iterations
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
test do
|
|
44
|
+
require "brute/session"
|
|
45
|
+
|
|
46
|
+
it "can be added to a stack" do
|
|
47
|
+
called = false
|
|
48
|
+
inner = ->(env) { called = true }
|
|
49
|
+
mw = Brute::Middleware::MaxIterations.new(inner)
|
|
50
|
+
mw.call({ current_iteration: 1, messages: Brute::Session.new })
|
|
51
|
+
called.should.be.true
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
it "prevents execution after given max" do
|
|
55
|
+
called = false
|
|
56
|
+
inner = ->(env) { called = true }
|
|
57
|
+
mw = Brute::Middleware::MaxIterations.new(inner, max_iterations: 0)
|
|
58
|
+
env = { current_iteration: 1, messages: Brute::Session.new }
|
|
59
|
+
mw.call(env)
|
|
60
|
+
called.should.be.false
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
it "injects a user message when max is hit" do
|
|
64
|
+
inner = ->(env) { }
|
|
65
|
+
mw = Brute::Middleware::MaxIterations.new(inner, max_iterations: 0)
|
|
66
|
+
session = Brute::Session.new
|
|
67
|
+
session.user("hi")
|
|
68
|
+
env = { current_iteration: 1, messages: session }
|
|
69
|
+
mw.call(env)
|
|
70
|
+
env[:messages].last.role.should == :user
|
|
71
|
+
env[:messages].last.content.should =~ /Maximum iterations reached/
|
|
72
|
+
end
|
|
73
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "bundler/setup"
|
|
4
|
+
require "brute"
|
|
5
|
+
|
|
6
|
+
module Brute
|
|
7
|
+
module Middleware
|
|
8
|
+
module OTel
|
|
9
|
+
# Records token usage from the LLM response as span attributes.
|
|
10
|
+
#
|
|
11
|
+
# Runs POST-call: reads token counts from the response usage object
|
|
12
|
+
# and sets them as attributes on the span.
|
|
13
|
+
#
|
|
14
|
+
class TokenUsage
|
|
15
|
+
def initialize(app)
|
|
16
|
+
@app = app
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def call(env)
|
|
20
|
+
#response = @app.call(env)
|
|
21
|
+
|
|
22
|
+
#span = env[:span]
|
|
23
|
+
#if span && response.respond_to?(:usage) && (usage = response.usage)
|
|
24
|
+
# span.set_attribute("gen_ai.usage.input_tokens", usage.input_tokens.to_i)
|
|
25
|
+
# span.set_attribute("gen_ai.usage.output_tokens", usage.output_tokens.to_i)
|
|
26
|
+
# span.set_attribute("gen_ai.usage.total_tokens", usage.total_tokens.to_i)
|
|
27
|
+
|
|
28
|
+
# reasoning = usage.reasoning_tokens.to_i
|
|
29
|
+
# span.set_attribute("gen_ai.usage.reasoning_tokens", reasoning) if reasoning > 0
|
|
30
|
+
#end
|
|
31
|
+
|
|
32
|
+
#response
|
|
33
|
+
@app.call(env)
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
test do
|
|
41
|
+
# not implemented
|
|
42
|
+
end
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "bundler/setup"
|
|
4
|
+
require "brute"
|
|
5
|
+
|
|
6
|
+
module Brute
|
|
7
|
+
module Middleware
|
|
8
|
+
# Prepends a system message to env[:messages] before passing control
|
|
9
|
+
# down the middleware chain.
|
|
10
|
+
#
|
|
11
|
+
# By default, uses Brute::SystemPrompt.default which assembles a
|
|
12
|
+
# provider-specific prompt stack (Identity, ToneAndStyle, ToolUsage,
|
|
13
|
+
# etc.) from the Brute::Prompts modules and text files.
|
|
14
|
+
#
|
|
15
|
+
# Pass a custom Brute::SystemPrompt instance to override — useful
|
|
16
|
+
# for SubAgents that need a specialized prompt (e.g. the explore
|
|
17
|
+
# agent prompt):
|
|
18
|
+
#
|
|
19
|
+
# use Brute::Middleware::SystemPrompt,
|
|
20
|
+
# system_prompt: Brute::SystemPrompt.build { |p, _ctx|
|
|
21
|
+
# p << Brute::Prompts.agent_prompt("explore")
|
|
22
|
+
# }
|
|
23
|
+
#
|
|
24
|
+
# Skips injection when env[:messages] already contains a :system
|
|
25
|
+
# message (e.g. from session.system(...)), so manually-set system
|
|
26
|
+
# prompts are respected.
|
|
27
|
+
#
|
|
28
|
+
class SystemPrompt
|
|
29
|
+
def initialize(app, system_prompt: Brute::SystemPrompt.default)
|
|
30
|
+
@app = app
|
|
31
|
+
@system_prompt = system_prompt
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def call(env)
|
|
35
|
+
unless env[:messages].any? { |m| m.role == :system }
|
|
36
|
+
ctx = build_context(env)
|
|
37
|
+
result = @system_prompt.prepare(ctx)
|
|
38
|
+
unless result.empty?
|
|
39
|
+
env[:messages].unshift(
|
|
40
|
+
RubyLLM::Message.new(role: :system, content: result.to_s)
|
|
41
|
+
)
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
@app.call(env)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
private
|
|
49
|
+
|
|
50
|
+
def build_context(env)
|
|
51
|
+
{
|
|
52
|
+
provider_name: env[:provider].to_s,
|
|
53
|
+
model_name: env[:model].to_s,
|
|
54
|
+
cwd: Dir.pwd,
|
|
55
|
+
}.merge(env.fetch(:metadata, {}))
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
test do
|
|
62
|
+
require "brute/session"
|
|
63
|
+
|
|
64
|
+
def build_middleware(system_prompt: Brute::SystemPrompt.default, &inner_block)
|
|
65
|
+
inner = inner_block || ->(env) { env }
|
|
66
|
+
Brute::Middleware::SystemPrompt.new(inner, system_prompt: system_prompt)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def base_env(messages: Brute::Session.new)
|
|
70
|
+
{
|
|
71
|
+
messages: messages,
|
|
72
|
+
provider: :anthropic,
|
|
73
|
+
model: "claude-sonnet-4-20250514",
|
|
74
|
+
metadata: {},
|
|
75
|
+
}
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
it "prepends a system message when none exists" do
|
|
79
|
+
mw = build_middleware
|
|
80
|
+
env = base_env
|
|
81
|
+
env[:messages].user("hi")
|
|
82
|
+
|
|
83
|
+
mw.call(env)
|
|
84
|
+
|
|
85
|
+
env[:messages].first.role.should == :system
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
it "skips injection when a system message already exists" do
|
|
89
|
+
mw = build_middleware
|
|
90
|
+
env = base_env
|
|
91
|
+
env[:messages].system("custom prompt")
|
|
92
|
+
env[:messages].user("hi")
|
|
93
|
+
|
|
94
|
+
mw.call(env)
|
|
95
|
+
|
|
96
|
+
env[:messages].select { |m| m.role == :system }.size.should == 1
|
|
97
|
+
env[:messages].first.content.should == "custom prompt"
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
it "accepts a custom system_prompt" do
|
|
101
|
+
custom = Brute::SystemPrompt.build { |p, _ctx| p << "You are a test agent." }
|
|
102
|
+
mw = build_middleware(system_prompt: custom)
|
|
103
|
+
env = base_env
|
|
104
|
+
env[:messages].user("hi")
|
|
105
|
+
|
|
106
|
+
mw.call(env)
|
|
107
|
+
|
|
108
|
+
env[:messages].first.role.should == :system
|
|
109
|
+
env[:messages].first.content.should == "You are a test agent."
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
it "merges metadata into context" do
|
|
113
|
+
captured_ctx = nil
|
|
114
|
+
spy_prompt = Brute::SystemPrompt.build do |p, ctx|
|
|
115
|
+
captured_ctx = ctx
|
|
116
|
+
p << "ok"
|
|
117
|
+
end
|
|
118
|
+
mw = build_middleware(system_prompt: spy_prompt)
|
|
119
|
+
env = base_env
|
|
120
|
+
env[:metadata][:agent] = "plan"
|
|
121
|
+
env[:messages].user("hi")
|
|
122
|
+
|
|
123
|
+
mw.call(env)
|
|
124
|
+
|
|
125
|
+
captured_ctx[:agent].should == "plan"
|
|
126
|
+
captured_ctx[:provider_name].should == "anthropic"
|
|
127
|
+
end
|
|
128
|
+
end
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "bundler/setup"
|
|
4
|
+
require "brute"
|
|
5
|
+
|
|
6
|
+
module Brute
|
|
7
|
+
module Middleware
|
|
8
|
+
# Checks context size after each LLM call and triggers compaction
|
|
9
|
+
# when thresholds are exceeded.
|
|
10
|
+
#
|
|
11
|
+
# It should add a compaction event to the logs with the context token
|
|
12
|
+
# total listed... this way a model that supports extra context can
|
|
13
|
+
# include the compaction as well as the previous messages...
|
|
14
|
+
#
|
|
15
|
+
# Or an LLM that doesn't support it can just use the messages
|
|
16
|
+
# that come after the compaction
|
|
17
|
+
#
|
|
18
|
+
class CompactionCheck
|
|
19
|
+
def initialize(app, compactor: nil, system_prompt:, **compactor_opts)
|
|
20
|
+
@app = app
|
|
21
|
+
@compactor = compactor
|
|
22
|
+
@compactor_opts = compactor_opts
|
|
23
|
+
@system_prompt = system_prompt
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def call(env)
|
|
27
|
+
#@compactor ||= Compactor.new(env[:provider], **@compactor_opts)
|
|
28
|
+
|
|
29
|
+
#messages = env[:messages]
|
|
30
|
+
#usage = env[:metadata].dig(:tokens, :last_call)
|
|
31
|
+
|
|
32
|
+
#if @compactor.should_compact?(messages, usage: usage)
|
|
33
|
+
# result = @compactor.compact(messages)
|
|
34
|
+
# if result
|
|
35
|
+
# summary_text, _recent = result
|
|
36
|
+
# env[:metadata][:compaction] = {
|
|
37
|
+
# messages_before: messages.size,
|
|
38
|
+
# timestamp: Time.now.iso8601,
|
|
39
|
+
# }
|
|
40
|
+
# # Replace the message history with the summary
|
|
41
|
+
# env[:messages] = [
|
|
42
|
+
# RubyLLM::Message.new(role: :system, content: @system_prompt),
|
|
43
|
+
# RubyLLM::Message.new(role: :user, content: "[Previous conversation summary]\n\n#{summary_text}"),
|
|
44
|
+
# ]
|
|
45
|
+
# end
|
|
46
|
+
#end
|
|
47
|
+
|
|
48
|
+
@app.call(env)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Context compaction service. When the conversation grows past configurable
|
|
52
|
+
# thresholds, older messages are summarized into a condensed form and the
|
|
53
|
+
# original messages are dropped, keeping the context window manageable.
|
|
54
|
+
class Compactor
|
|
55
|
+
DEFAULTS = {
|
|
56
|
+
token_threshold: 100_000, # Compact when estimated tokens exceed this
|
|
57
|
+
message_threshold: 200, # Compact when message count exceeds this
|
|
58
|
+
retention_window: 6, # Minimum recent messages to always keep
|
|
59
|
+
summary_model: nil, # Model for summarization (uses agent's model if nil)
|
|
60
|
+
}.freeze
|
|
61
|
+
|
|
62
|
+
attr_reader :config
|
|
63
|
+
|
|
64
|
+
def initialize(provider, **opts)
|
|
65
|
+
@provider = provider
|
|
66
|
+
@config = DEFAULTS.merge(opts)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Check whether compaction should run based on current context state.
|
|
70
|
+
def should_compact?(messages, usage: nil)
|
|
71
|
+
return true if messages.size > @config[:message_threshold]
|
|
72
|
+
return true if usage && (usage[:total] || 0) > @config[:token_threshold]
|
|
73
|
+
false
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Compact the message history by summarizing older messages.
|
|
77
|
+
#
|
|
78
|
+
# Returns [summary_message, kept_messages] — the caller rebuilds
|
|
79
|
+
# the context from these.
|
|
80
|
+
def compact(messages)
|
|
81
|
+
total = messages.size
|
|
82
|
+
keep_count = [@config[:retention_window], total].min
|
|
83
|
+
return nil if total <= keep_count
|
|
84
|
+
|
|
85
|
+
old_messages = messages[0...(total - keep_count)]
|
|
86
|
+
recent_messages = messages[(total - keep_count)..]
|
|
87
|
+
|
|
88
|
+
summary_text = summarize(old_messages)
|
|
89
|
+
|
|
90
|
+
[summary_text, recent_messages]
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
private
|
|
94
|
+
|
|
95
|
+
def summarize(messages)
|
|
96
|
+
# Build a condensed representation of the conversation for the summarizer
|
|
97
|
+
conversation_text = messages.map { |m|
|
|
98
|
+
role = if m.respond_to?(:role)
|
|
99
|
+
m.role.to_s
|
|
100
|
+
else
|
|
101
|
+
"unknown"
|
|
102
|
+
end
|
|
103
|
+
content = if m.respond_to?(:content)
|
|
104
|
+
m.content.to_s[0..1000]
|
|
105
|
+
else
|
|
106
|
+
m.to_s[0..1000]
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Include tool call info for assistant messages
|
|
110
|
+
tool_info = ""
|
|
111
|
+
if m.respond_to?(:functions) && m.functions&.any?
|
|
112
|
+
calls = m.functions.map { |f| "#{f.name}(#{f.arguments.to_s[0..200]})" }
|
|
113
|
+
tool_info = " [tools: #{calls.join(", ")}]"
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
"#{role}:#{tool_info} #{content}"
|
|
117
|
+
}.join("\n---\n")
|
|
118
|
+
|
|
119
|
+
prompt = <<~PROMPT
|
|
120
|
+
Summarize this conversation history for context continuity. The summary will replace
|
|
121
|
+
these messages in the context window, so include everything the agent needs to continue
|
|
122
|
+
working effectively.
|
|
123
|
+
|
|
124
|
+
Structure your summary as:
|
|
125
|
+
## Goal
|
|
126
|
+
What the user asked for.
|
|
127
|
+
|
|
128
|
+
## Progress
|
|
129
|
+
- Files read, created, or modified (list paths)
|
|
130
|
+
- Commands executed and their outcomes
|
|
131
|
+
- Key decisions made
|
|
132
|
+
|
|
133
|
+
## Current State
|
|
134
|
+
Where things stand right now — what's done and what remains.
|
|
135
|
+
|
|
136
|
+
## Next Steps
|
|
137
|
+
What should happen next based on the conversation.
|
|
138
|
+
|
|
139
|
+
---
|
|
140
|
+
CONVERSATION:
|
|
141
|
+
#{conversation_text}
|
|
142
|
+
PROMPT
|
|
143
|
+
|
|
144
|
+
model = @config[:summary_model] || "claude-sonnet-4-20250514"
|
|
145
|
+
res = @provider.complete(prompt, model: model)
|
|
146
|
+
res.content
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
test do
|
|
154
|
+
# not implemented
|
|
155
|
+
end
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "bundler/setup"
|
|
4
|
+
require "brute"
|
|
5
|
+
|
|
6
|
+
module Brute
|
|
7
|
+
module Middleware
|
|
8
|
+
class Question
|
|
9
|
+
def initialize(app)
|
|
10
|
+
@app = app
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def call(env)
|
|
14
|
+
@app.call(env).tap do
|
|
15
|
+
#if env[:messages].last.tool_call?
|
|
16
|
+
# questions = last_message.tool_calls.select { |_id, tc| tc.name == "question" }
|
|
17
|
+
|
|
18
|
+
# if questions.any?
|
|
19
|
+
# env[:events] << {
|
|
20
|
+
# type: :tool_call_start,
|
|
21
|
+
# data: questions.map { |_id, tc| { name: tc.name, call_id: tc.id, arguments: tc.arguments } }
|
|
22
|
+
# }
|
|
23
|
+
|
|
24
|
+
# questions.each do |_id, question|
|
|
25
|
+
# result = question.call
|
|
26
|
+
|
|
27
|
+
# env[:events] << { type: :tool_result, data: { name: tc.name, content: content } }
|
|
28
|
+
|
|
29
|
+
# env[:messages] << RubyLLM::Message.new(role: :tool, content: content, tool_call_id: tc.id)
|
|
30
|
+
# end
|
|
31
|
+
# end
|
|
32
|
+
#end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
test do
|
|
40
|
+
# not implemented
|
|
41
|
+
end
|