brute 1.0.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/brute/agent.rb +72 -6
- data/lib/brute/events/handler.rb +69 -0
- data/lib/brute/events/prefixed_terminal_output.rb +72 -0
- data/lib/brute/events/terminal_output_handler.rb +68 -0
- data/lib/brute/middleware/001_otel_span.rb +77 -0
- data/lib/brute/middleware/003_tool_result_loop.rb +103 -0
- data/lib/brute/middleware/004_summarize.rb +139 -0
- data/lib/brute/middleware/005_tracing.rb +86 -0
- data/lib/brute/middleware/010_max_iterations.rb +73 -0
- data/lib/brute/middleware/015_otel_token_usage.rb +42 -0
- data/lib/brute/middleware/020_system_prompt.rb +128 -0
- data/lib/brute/middleware/040_compaction_check.rb +155 -0
- data/lib/brute/middleware/060_questions.rb +41 -0
- data/lib/brute/middleware/070_tool_call.rb +247 -0
- data/lib/brute/middleware/073_otel_tool_call.rb +49 -0
- data/lib/brute/middleware/075_otel_tool_results.rb +46 -0
- data/lib/brute/middleware/100_llm_call.rb +62 -0
- data/lib/brute/middleware/event_handler.rb +25 -0
- data/lib/brute/middleware/user_queue.rb +35 -0
- data/lib/brute/pipeline.rb +44 -107
- data/lib/brute/prompts/skills.rb +2 -2
- data/lib/brute/prompts.rb +23 -23
- data/lib/brute/providers/shell.rb +6 -19
- data/lib/brute/providers/shell_response.rb +22 -30
- data/lib/brute/session.rb +52 -0
- data/lib/brute/store/snapshot_store.rb +21 -37
- data/lib/brute/sub_agent.rb +106 -0
- data/lib/brute/system_prompt.rb +1 -83
- data/lib/brute/tool.rb +107 -0
- data/lib/brute/tools/delegate.rb +61 -70
- data/lib/brute/tools/fs_patch.rb +9 -7
- data/lib/brute/tools/fs_read.rb +233 -20
- data/lib/brute/tools/fs_remove.rb +8 -9
- data/lib/brute/tools/fs_search.rb +98 -16
- data/lib/brute/tools/fs_undo.rb +8 -8
- data/lib/brute/tools/fs_write.rb +7 -5
- data/lib/brute/tools/net_fetch.rb +8 -8
- data/lib/brute/tools/question.rb +36 -24
- data/lib/brute/tools/shell.rb +74 -16
- data/lib/brute/tools/todo_read.rb +8 -8
- data/lib/brute/tools/todo_write.rb +25 -18
- data/lib/brute/tools.rb +8 -12
- data/lib/brute/truncation.rb +219 -0
- data/lib/brute/version.rb +1 -1
- data/lib/brute.rb +82 -45
- metadata +59 -46
- data/lib/brute/loop/agent_stream.rb +0 -118
- data/lib/brute/loop/agent_turn.rb +0 -520
- data/lib/brute/loop/compactor.rb +0 -107
- data/lib/brute/loop/doom_loop.rb +0 -86
- data/lib/brute/loop/step.rb +0 -332
- data/lib/brute/loop/tool_call_step.rb +0 -90
- data/lib/brute/middleware/base.rb +0 -27
- data/lib/brute/middleware/compaction_check.rb +0 -106
- data/lib/brute/middleware/doom_loop_detection.rb +0 -136
- data/lib/brute/middleware/llm_call.rb +0 -128
- data/lib/brute/middleware/message_tracking.rb +0 -339
- data/lib/brute/middleware/otel/span.rb +0 -105
- data/lib/brute/middleware/otel/token_usage.rb +0 -68
- data/lib/brute/middleware/otel/tool_calls.rb +0 -68
- data/lib/brute/middleware/otel/tool_results.rb +0 -65
- data/lib/brute/middleware/otel.rb +0 -34
- data/lib/brute/middleware/reasoning_normalizer.rb +0 -192
- data/lib/brute/middleware/retry.rb +0 -157
- data/lib/brute/middleware/session_persistence.rb +0 -72
- data/lib/brute/middleware/token_tracking.rb +0 -124
- data/lib/brute/middleware/tool_error_tracking.rb +0 -179
- data/lib/brute/middleware/tool_use_guard.rb +0 -133
- data/lib/brute/middleware/tracing.rb +0 -124
- data/lib/brute/middleware.rb +0 -18
- data/lib/brute/orchestrator/turn.rb +0 -105
- data/lib/brute/patches/anthropic_tool_role.rb +0 -35
- data/lib/brute/patches/buffer_nil_guard.rb +0 -26
- data/lib/brute/providers/models_dev.rb +0 -111
- data/lib/brute/providers/ollama.rb +0 -135
- data/lib/brute/providers/opencode_go.rb +0 -43
- data/lib/brute/providers/opencode_zen.rb +0 -87
- data/lib/brute/providers.rb +0 -62
- data/lib/brute/queue/base_queue.rb +0 -222
- data/lib/brute/queue/parallel_queue.rb +0 -66
- data/lib/brute/queue/sequential_queue.rb +0 -63
- data/lib/brute/store/message_store.rb +0 -362
- data/lib/brute/store/session.rb +0 -106
- /data/lib/brute/{diff.rb → utils/diff.rb} +0 -0
|
@@ -1,68 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require "bundler/setup"
|
|
4
|
-
require "brute"
|
|
5
|
-
|
|
6
|
-
module Brute
|
|
7
|
-
module Middleware
|
|
8
|
-
module OTel
|
|
9
|
-
# Records tool calls the LLM requested as span events.
|
|
10
|
-
#
|
|
11
|
-
# Runs POST-call: after the LLM responds, inspects ctx.functions
|
|
12
|
-
# for any tool calls the model wants to make, and adds a span event
|
|
13
|
-
# for each one with the tool name, call ID, and arguments.
|
|
14
|
-
#
|
|
15
|
-
class ToolCalls < Base
|
|
16
|
-
def call(env)
|
|
17
|
-
response = @app.call(env)
|
|
18
|
-
|
|
19
|
-
span = env[:span]
|
|
20
|
-
if span
|
|
21
|
-
functions = env[:pending_functions]
|
|
22
|
-
if functions && !functions.empty?
|
|
23
|
-
span.set_attribute("brute.tool_calls.count", functions.size)
|
|
24
|
-
|
|
25
|
-
functions.each do |fn|
|
|
26
|
-
attrs = {
|
|
27
|
-
"tool.name" => fn.name.to_s,
|
|
28
|
-
"tool.id" => fn.id.to_s,
|
|
29
|
-
}
|
|
30
|
-
args = fn.arguments
|
|
31
|
-
attrs["tool.arguments"] = args.to_json if args
|
|
32
|
-
span.add_event("tool_call", attributes: attrs)
|
|
33
|
-
end
|
|
34
|
-
end
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
response
|
|
38
|
-
end
|
|
39
|
-
end
|
|
40
|
-
end
|
|
41
|
-
end
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
test do
|
|
45
|
-
require_relative "../../../../spec/support/mock_provider"
|
|
46
|
-
require_relative "../../../../spec/support/mock_response"
|
|
47
|
-
|
|
48
|
-
def build_env(**overrides)
|
|
49
|
-
{ provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
|
|
50
|
-
messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
|
|
51
|
-
tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
it "passes the response through unchanged" do
|
|
55
|
-
response = MockResponse.new(content: "here's my plan")
|
|
56
|
-
middleware = Brute::Middleware::OTel::ToolCalls.new(->(_env) { response })
|
|
57
|
-
result = middleware.call(build_env)
|
|
58
|
-
result.should == response
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
it "passes through without error when span is nil with pending functions" do
|
|
62
|
-
response = MockResponse.new(content: "here's my plan")
|
|
63
|
-
fn = Struct.new(:name, :id, :arguments, keyword_init: true).new(name: "fs_read", id: "tc_001", arguments: { "path" => "/tmp" })
|
|
64
|
-
middleware = Brute::Middleware::OTel::ToolCalls.new(->(_env) { response })
|
|
65
|
-
result = middleware.call(build_env(pending_functions: [fn]))
|
|
66
|
-
result.should == response
|
|
67
|
-
end
|
|
68
|
-
end
|
|
@@ -1,65 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require "bundler/setup"
|
|
4
|
-
require "brute"
|
|
5
|
-
|
|
6
|
-
module Brute
|
|
7
|
-
module Middleware
|
|
8
|
-
module OTel
|
|
9
|
-
# Records tool results being sent back to the LLM as span events.
|
|
10
|
-
#
|
|
11
|
-
# Runs PRE-call: when env[:tool_results] is present, the agent loop
|
|
12
|
-
# is sending tool execution results back to the LLM. Each result gets
|
|
13
|
-
# a span event with the tool name and success/error status.
|
|
14
|
-
#
|
|
15
|
-
class ToolResults < Base
|
|
16
|
-
def call(env)
|
|
17
|
-
span = env[:span]
|
|
18
|
-
|
|
19
|
-
if span && (results = env[:tool_results])
|
|
20
|
-
span.set_attribute("brute.tool_results.count", results.size)
|
|
21
|
-
|
|
22
|
-
results.each do |name, value|
|
|
23
|
-
error = value.is_a?(Hash) && value[:error]
|
|
24
|
-
attrs = { "tool.name" => name.to_s }
|
|
25
|
-
if error
|
|
26
|
-
attrs["tool.status"] = "error"
|
|
27
|
-
attrs["tool.error"] = value[:error].to_s
|
|
28
|
-
else
|
|
29
|
-
attrs["tool.status"] = "ok"
|
|
30
|
-
end
|
|
31
|
-
span.add_event("tool_result", attributes: attrs)
|
|
32
|
-
end
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
@app.call(env)
|
|
36
|
-
end
|
|
37
|
-
end
|
|
38
|
-
end
|
|
39
|
-
end
|
|
40
|
-
end
|
|
41
|
-
|
|
42
|
-
test do
|
|
43
|
-
require_relative "../../../../spec/support/mock_provider"
|
|
44
|
-
require_relative "../../../../spec/support/mock_response"
|
|
45
|
-
|
|
46
|
-
def build_env(**overrides)
|
|
47
|
-
{ provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
|
|
48
|
-
messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
|
|
49
|
-
tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
it "passes the response through unchanged" do
|
|
53
|
-
response = MockResponse.new(content: "processed")
|
|
54
|
-
middleware = Brute::Middleware::OTel::ToolResults.new(->(_env) { response })
|
|
55
|
-
result = middleware.call(build_env)
|
|
56
|
-
result.should == response
|
|
57
|
-
end
|
|
58
|
-
|
|
59
|
-
it "passes through without error when span is nil" do
|
|
60
|
-
response = MockResponse.new(content: "processed")
|
|
61
|
-
middleware = Brute::Middleware::OTel::ToolResults.new(->(_env) { response })
|
|
62
|
-
result = middleware.call(build_env(tool_results: [["fs_read", { content: "data" }]]))
|
|
63
|
-
result.should == response
|
|
64
|
-
end
|
|
65
|
-
end
|
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
if __FILE__ == $0
|
|
4
|
-
require "bundler/setup"
|
|
5
|
-
require "brute"
|
|
6
|
-
end
|
|
7
|
-
|
|
8
|
-
module Brute
|
|
9
|
-
module Middleware
|
|
10
|
-
# OpenTelemetry instrumentation for the LLM pipeline.
|
|
11
|
-
#
|
|
12
|
-
# Each middleware is independent and communicates through env[:span].
|
|
13
|
-
# OTel::Span must be outermost — it creates the span. The rest
|
|
14
|
-
# decorate it with events and attributes from their position in the
|
|
15
|
-
# pipeline.
|
|
16
|
-
#
|
|
17
|
-
# All middlewares are no-ops when opentelemetry-sdk is not loaded.
|
|
18
|
-
#
|
|
19
|
-
# Usage in pipeline:
|
|
20
|
-
#
|
|
21
|
-
# use Brute::Middleware::OTel::Span
|
|
22
|
-
# use Brute::Middleware::OTel::ToolResults
|
|
23
|
-
# use Brute::Middleware::OTel::ToolCalls
|
|
24
|
-
# use Brute::Middleware::OTel::TokenUsage
|
|
25
|
-
#
|
|
26
|
-
module OTel
|
|
27
|
-
end
|
|
28
|
-
end
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
require_relative "otel/span"
|
|
32
|
-
require_relative "otel/tool_results"
|
|
33
|
-
require_relative "otel/tool_calls"
|
|
34
|
-
require_relative "otel/token_usage"
|
|
@@ -1,192 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require "bundler/setup"
|
|
4
|
-
require "brute"
|
|
5
|
-
|
|
6
|
-
module Brute
|
|
7
|
-
module Middleware
|
|
8
|
-
# Handles reasoning/thinking content across model switches.
|
|
9
|
-
#
|
|
10
|
-
# PRE-call:
|
|
11
|
-
# - If reasoning is enabled, injects provider-specific params into
|
|
12
|
-
# the env (e.g., Anthropic thinking config, OpenAI reasoning_effort).
|
|
13
|
-
# - Tracks which model produced each message. When the model changes,
|
|
14
|
-
# strips reasoning_content from messages produced by the old model
|
|
15
|
-
# (signatures are model-specific and cryptographically tied).
|
|
16
|
-
#
|
|
17
|
-
# POST-call:
|
|
18
|
-
# - Records the current model on the response for future normalization.
|
|
19
|
-
#
|
|
20
|
-
# llm.rb exposes:
|
|
21
|
-
# - response.reasoning_content — the thinking text
|
|
22
|
-
# - response.reasoning_tokens — token count
|
|
23
|
-
# - Provider params pass-through — we can send thinking:, reasoning_effort:, etc.
|
|
24
|
-
#
|
|
25
|
-
class ReasoningNormalizer < Base
|
|
26
|
-
# Effort levels that map to provider-specific params.
|
|
27
|
-
# Mirrors forgecode's Effort enum.
|
|
28
|
-
EFFORT_LEVELS = {
|
|
29
|
-
none: "none",
|
|
30
|
-
minimal: "low",
|
|
31
|
-
low: "low",
|
|
32
|
-
medium: "medium",
|
|
33
|
-
high: "high",
|
|
34
|
-
xhigh: "high",
|
|
35
|
-
max: "high",
|
|
36
|
-
}.freeze
|
|
37
|
-
|
|
38
|
-
def initialize(app, model_id: nil, effort: :medium, enabled: true, budget_tokens: nil)
|
|
39
|
-
super(app)
|
|
40
|
-
@model_id = model_id
|
|
41
|
-
@effort = effort
|
|
42
|
-
@enabled = enabled
|
|
43
|
-
@budget_tokens = budget_tokens
|
|
44
|
-
@message_models = [] # tracks which model produced each assistant message
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
def call(env)
|
|
48
|
-
if @enabled
|
|
49
|
-
inject_reasoning_params!(env)
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
response = @app.call(env)
|
|
53
|
-
|
|
54
|
-
# POST: record which model produced this response
|
|
55
|
-
if response
|
|
56
|
-
@message_models << @model_id
|
|
57
|
-
end
|
|
58
|
-
|
|
59
|
-
response
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
# Update the active model (e.g., when user switches models mid-session).
|
|
63
|
-
def model_id=(new_model)
|
|
64
|
-
@model_id = new_model
|
|
65
|
-
end
|
|
66
|
-
|
|
67
|
-
private
|
|
68
|
-
|
|
69
|
-
def inject_reasoning_params!(env)
|
|
70
|
-
env[:params] ||= {}
|
|
71
|
-
provider = env[:provider]
|
|
72
|
-
|
|
73
|
-
case provider_type(provider)
|
|
74
|
-
when :anthropic
|
|
75
|
-
if @budget_tokens
|
|
76
|
-
# Older extended thinking API (claude-3.7-sonnet style)
|
|
77
|
-
env[:params][:thinking] = {type: "enabled", budget_tokens: @budget_tokens}
|
|
78
|
-
else
|
|
79
|
-
# Newer effort-based API (claude-4 style) — pass through
|
|
80
|
-
# Anthropic handles this via the model itself
|
|
81
|
-
end
|
|
82
|
-
when :openai
|
|
83
|
-
env[:params][:reasoning_effort] = EFFORT_LEVELS[@effort] || "medium"
|
|
84
|
-
end
|
|
85
|
-
end
|
|
86
|
-
|
|
87
|
-
def provider_type(provider)
|
|
88
|
-
class_name = provider.class.name.to_s.downcase
|
|
89
|
-
if class_name.include?("anthropic")
|
|
90
|
-
:anthropic
|
|
91
|
-
elsif class_name.include?("openai")
|
|
92
|
-
:openai
|
|
93
|
-
elsif class_name.include?("google") || class_name.include?("gemini")
|
|
94
|
-
:google
|
|
95
|
-
else
|
|
96
|
-
:unknown
|
|
97
|
-
end
|
|
98
|
-
end
|
|
99
|
-
end
|
|
100
|
-
end
|
|
101
|
-
end
|
|
102
|
-
|
|
103
|
-
test do
|
|
104
|
-
require_relative "../../../spec/support/mock_provider"
|
|
105
|
-
require_relative "../../../spec/support/mock_response"
|
|
106
|
-
|
|
107
|
-
def build_env(**overrides)
|
|
108
|
-
{ provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
|
|
109
|
-
messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
|
|
110
|
-
tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
|
|
111
|
-
end
|
|
112
|
-
|
|
113
|
-
def make_provider(type_name)
|
|
114
|
-
klass = Class.new do
|
|
115
|
-
define_method(:name) { :mock }
|
|
116
|
-
define_method(:default_model) { "mock-model" }
|
|
117
|
-
define_method(:user_role) { :user }
|
|
118
|
-
define_method(:system_role) { :system }
|
|
119
|
-
define_method(:assistant_role) { :assistant }
|
|
120
|
-
define_method(:tool_role) { :tool }
|
|
121
|
-
define_method(:tracer) { nil }
|
|
122
|
-
define_method(:tracer=) { |*| }
|
|
123
|
-
define_method(:complete) { |*_args, **_kw| MockResponse.new(content: "ok") }
|
|
124
|
-
end
|
|
125
|
-
klass.define_method(:class) do
|
|
126
|
-
c = super()
|
|
127
|
-
name_str = "LLM::#{type_name}"
|
|
128
|
-
c.define_singleton_method(:name) { name_str }
|
|
129
|
-
c
|
|
130
|
-
end
|
|
131
|
-
klass.new
|
|
132
|
-
end
|
|
133
|
-
|
|
134
|
-
inner_app = ->(_env) { MockResponse.new(content: "reasoned response") }
|
|
135
|
-
|
|
136
|
-
it "injects thinking param for Anthropic with budget_tokens" do
|
|
137
|
-
provider = make_provider("Anthropic")
|
|
138
|
-
middleware = Brute::Middleware::ReasoningNormalizer.new(inner_app, model_id: "claude-4", budget_tokens: 8000, enabled: true)
|
|
139
|
-
env = build_env(provider: provider, params: {})
|
|
140
|
-
middleware.call(env)
|
|
141
|
-
env[:params][:thinking].should == { type: "enabled", budget_tokens: 8000 }
|
|
142
|
-
end
|
|
143
|
-
|
|
144
|
-
it "does not inject thinking param for Anthropic without budget_tokens" do
|
|
145
|
-
provider = make_provider("Anthropic")
|
|
146
|
-
middleware = Brute::Middleware::ReasoningNormalizer.new(inner_app, model_id: "claude-4", enabled: true)
|
|
147
|
-
env = build_env(provider: provider, params: {})
|
|
148
|
-
middleware.call(env)
|
|
149
|
-
env[:params][:thinking].should.be.nil
|
|
150
|
-
end
|
|
151
|
-
|
|
152
|
-
it "injects reasoning_effort for OpenAI" do
|
|
153
|
-
provider = make_provider("OpenAI")
|
|
154
|
-
middleware = Brute::Middleware::ReasoningNormalizer.new(inner_app, model_id: "o3", effort: :high, enabled: true)
|
|
155
|
-
env = build_env(provider: provider, params: {})
|
|
156
|
-
middleware.call(env)
|
|
157
|
-
env[:params][:reasoning_effort].should == "high"
|
|
158
|
-
end
|
|
159
|
-
|
|
160
|
-
it "maps low effort correctly for OpenAI" do
|
|
161
|
-
provider = make_provider("OpenAI")
|
|
162
|
-
middleware = Brute::Middleware::ReasoningNormalizer.new(inner_app, model_id: "o3", effort: :low, enabled: true)
|
|
163
|
-
env = build_env(provider: provider, params: {})
|
|
164
|
-
middleware.call(env)
|
|
165
|
-
env[:params][:reasoning_effort].should == "low"
|
|
166
|
-
end
|
|
167
|
-
|
|
168
|
-
it "does not inject params for unknown provider" do
|
|
169
|
-
provider = make_provider("Mistral")
|
|
170
|
-
middleware = Brute::Middleware::ReasoningNormalizer.new(inner_app, model_id: "mistral-large", enabled: true)
|
|
171
|
-
env = build_env(provider: provider, params: {})
|
|
172
|
-
middleware.call(env)
|
|
173
|
-
env[:params].should == {}
|
|
174
|
-
end
|
|
175
|
-
|
|
176
|
-
it "does not inject params when disabled" do
|
|
177
|
-
provider = make_provider("Anthropic")
|
|
178
|
-
middleware = Brute::Middleware::ReasoningNormalizer.new(inner_app, model_id: "claude-4", budget_tokens: 8000, enabled: false)
|
|
179
|
-
env = build_env(provider: provider, params: {})
|
|
180
|
-
middleware.call(env)
|
|
181
|
-
env[:params].should == {}
|
|
182
|
-
end
|
|
183
|
-
|
|
184
|
-
it "allows model_id to be updated mid-session" do
|
|
185
|
-
middleware = Brute::Middleware::ReasoningNormalizer.new(inner_app, model_id: "old", enabled: true)
|
|
186
|
-
middleware.model_id = "new"
|
|
187
|
-
provider = make_provider("OpenAI")
|
|
188
|
-
env = build_env(provider: provider, params: {})
|
|
189
|
-
middleware.call(env)
|
|
190
|
-
env[:params][:reasoning_effort].should.not.be.nil
|
|
191
|
-
end
|
|
192
|
-
end
|
|
@@ -1,157 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require "bundler/setup"
|
|
4
|
-
require "brute"
|
|
5
|
-
|
|
6
|
-
module Brute
|
|
7
|
-
module Middleware
|
|
8
|
-
# Retries the inner call on transient LLM errors with exponential backoff.
|
|
9
|
-
#
|
|
10
|
-
# Catches LLM::RateLimitError and LLM::ServerError, sleeps with
|
|
11
|
-
# exponential delay, and re-calls the inner app. Non-retryable errors
|
|
12
|
-
# propagate immediately.
|
|
13
|
-
#
|
|
14
|
-
# Unlike forgecode's separate retry.rs, this middleware wraps the LLM call
|
|
15
|
-
# directly — it sees the error and retries without the agent loop knowing.
|
|
16
|
-
#
|
|
17
|
-
class Retry < Base
|
|
18
|
-
DEFAULT_MAX_ATTEMPTS = 3
|
|
19
|
-
DEFAULT_BASE_DELAY = 2 # seconds
|
|
20
|
-
|
|
21
|
-
def initialize(app, max_attempts: DEFAULT_MAX_ATTEMPTS, base_delay: DEFAULT_BASE_DELAY)
|
|
22
|
-
super(app)
|
|
23
|
-
@max_attempts = max_attempts
|
|
24
|
-
@base_delay = base_delay
|
|
25
|
-
end
|
|
26
|
-
|
|
27
|
-
def call(env)
|
|
28
|
-
attempts = 0
|
|
29
|
-
begin
|
|
30
|
-
@app.call(env)
|
|
31
|
-
rescue LLM::RateLimitError, LLM::ServerError => e
|
|
32
|
-
attempts += 1
|
|
33
|
-
if attempts >= @max_attempts
|
|
34
|
-
env[:metadata][:last_error] = e.message
|
|
35
|
-
raise
|
|
36
|
-
end
|
|
37
|
-
|
|
38
|
-
delay = @base_delay ** attempts
|
|
39
|
-
env[:metadata][:retry_attempt] = attempts
|
|
40
|
-
env[:metadata][:retry_delay] = delay
|
|
41
|
-
|
|
42
|
-
sleep(delay)
|
|
43
|
-
retry
|
|
44
|
-
end
|
|
45
|
-
end
|
|
46
|
-
end
|
|
47
|
-
end
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
test do
|
|
51
|
-
require_relative "../../../spec/support/mock_provider"
|
|
52
|
-
require_relative "../../../spec/support/mock_response"
|
|
53
|
-
|
|
54
|
-
def build_env(**overrides)
|
|
55
|
-
{ provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
|
|
56
|
-
messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
|
|
57
|
-
tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
|
|
58
|
-
end
|
|
59
|
-
|
|
60
|
-
def mock_inner_app(response:)
|
|
61
|
-
calls = []
|
|
62
|
-
app = ->(env) { calls << env; response }
|
|
63
|
-
[app, calls]
|
|
64
|
-
end
|
|
65
|
-
|
|
66
|
-
def flaky_inner_app(error_class, fail_count:, response:)
|
|
67
|
-
attempt = 0
|
|
68
|
-
->(env) { attempt += 1; raise error_class, "transient" if attempt <= fail_count; response }
|
|
69
|
-
end
|
|
70
|
-
|
|
71
|
-
def no_sleep_retry(*args, **kwargs)
|
|
72
|
-
mw = Brute::Middleware::Retry.new(*args, **kwargs)
|
|
73
|
-
mw.define_singleton_method(:sleep) { |_| }
|
|
74
|
-
mw
|
|
75
|
-
end
|
|
76
|
-
|
|
77
|
-
it "returns the response on first successful call" do
|
|
78
|
-
response = MockResponse.new(content: "success")
|
|
79
|
-
app, calls = mock_inner_app(response: response)
|
|
80
|
-
middleware = Brute::Middleware::Retry.new(app)
|
|
81
|
-
result = middleware.call(build_env)
|
|
82
|
-
result.should == response
|
|
83
|
-
end
|
|
84
|
-
|
|
85
|
-
it "calls inner app exactly once on success" do
|
|
86
|
-
response = MockResponse.new(content: "success")
|
|
87
|
-
app, calls = mock_inner_app(response: response)
|
|
88
|
-
Brute::Middleware::Retry.new(app).call(build_env)
|
|
89
|
-
calls.size.should == 1
|
|
90
|
-
end
|
|
91
|
-
|
|
92
|
-
it "retries on LLM::RateLimitError and succeeds" do
|
|
93
|
-
response = MockResponse.new(content: "success")
|
|
94
|
-
app = flaky_inner_app(LLM::RateLimitError, fail_count: 2, response: response)
|
|
95
|
-
middleware = no_sleep_retry(app, max_attempts: 3, base_delay: 2)
|
|
96
|
-
env = build_env
|
|
97
|
-
result = middleware.call(env)
|
|
98
|
-
result.should == response
|
|
99
|
-
end
|
|
100
|
-
|
|
101
|
-
it "records retry_attempt in metadata after retries" do
|
|
102
|
-
response = MockResponse.new(content: "success")
|
|
103
|
-
app = flaky_inner_app(LLM::RateLimitError, fail_count: 2, response: response)
|
|
104
|
-
middleware = no_sleep_retry(app, max_attempts: 3, base_delay: 2)
|
|
105
|
-
env = build_env
|
|
106
|
-
middleware.call(env)
|
|
107
|
-
env[:metadata][:retry_attempt].should == 2
|
|
108
|
-
end
|
|
109
|
-
|
|
110
|
-
it "retries on LLM::ServerError and succeeds" do
|
|
111
|
-
response = MockResponse.new(content: "success")
|
|
112
|
-
app = flaky_inner_app(LLM::ServerError, fail_count: 1, response: response)
|
|
113
|
-
middleware = no_sleep_retry(app, max_attempts: 3, base_delay: 2)
|
|
114
|
-
result = middleware.call(build_env)
|
|
115
|
-
result.should == response
|
|
116
|
-
end
|
|
117
|
-
|
|
118
|
-
it "re-raises after exhausting all attempts" do
|
|
119
|
-
app = ->(_env) { raise LLM::RateLimitError, "rate limited" }
|
|
120
|
-
middleware = no_sleep_retry(app, max_attempts: 3, base_delay: 2)
|
|
121
|
-
lambda { middleware.call(build_env) }.should.raise(LLM::RateLimitError)
|
|
122
|
-
end
|
|
123
|
-
|
|
124
|
-
it "does not retry non-retryable errors" do
|
|
125
|
-
call_count = 0
|
|
126
|
-
app = ->(_env) { call_count += 1; raise ArgumentError, "bad input" }
|
|
127
|
-
middleware = Brute::Middleware::Retry.new(app)
|
|
128
|
-
lambda { middleware.call(build_env) }.should.raise(ArgumentError)
|
|
129
|
-
end
|
|
130
|
-
|
|
131
|
-
it "only calls inner app once for non-retryable errors" do
|
|
132
|
-
call_count = 0
|
|
133
|
-
app = ->(_env) { call_count += 1; raise ArgumentError, "bad input" }
|
|
134
|
-
middleware = Brute::Middleware::Retry.new(app)
|
|
135
|
-
begin; middleware.call(build_env); rescue ArgumentError; end
|
|
136
|
-
call_count.should == 1
|
|
137
|
-
end
|
|
138
|
-
|
|
139
|
-
it "records retry_delay in metadata" do
|
|
140
|
-
response = MockResponse.new(content: "success")
|
|
141
|
-
app = flaky_inner_app(LLM::RateLimitError, fail_count: 1, response: response)
|
|
142
|
-
middleware = no_sleep_retry(app, max_attempts: 3, base_delay: 3)
|
|
143
|
-
env = build_env
|
|
144
|
-
middleware.call(env)
|
|
145
|
-
env[:metadata][:retry_delay].should == 3
|
|
146
|
-
end
|
|
147
|
-
|
|
148
|
-
it "tracks sleep delays for exponential backoff" do
|
|
149
|
-
response = MockResponse.new(content: "success")
|
|
150
|
-
app = flaky_inner_app(LLM::RateLimitError, fail_count: 2, response: response)
|
|
151
|
-
delays = []
|
|
152
|
-
mw = Brute::Middleware::Retry.new(app, max_attempts: 3, base_delay: 2)
|
|
153
|
-
mw.define_singleton_method(:sleep) { |d| delays << d }
|
|
154
|
-
mw.call(build_env)
|
|
155
|
-
delays.should == [2, 4]
|
|
156
|
-
end
|
|
157
|
-
end
|
|
@@ -1,72 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require "bundler/setup"
|
|
4
|
-
require "brute"
|
|
5
|
-
|
|
6
|
-
module Brute
|
|
7
|
-
module Middleware
|
|
8
|
-
# Saves the conversation to disk after each LLM call.
|
|
9
|
-
#
|
|
10
|
-
# Runs POST-call: serializes env[:messages] via Session#save_messages.
|
|
11
|
-
# Failures are non-fatal — a broken session save should never crash
|
|
12
|
-
# the agent loop.
|
|
13
|
-
#
|
|
14
|
-
class SessionPersistence < Base
|
|
15
|
-
def initialize(app, session:)
|
|
16
|
-
super(app)
|
|
17
|
-
@session = session
|
|
18
|
-
end
|
|
19
|
-
|
|
20
|
-
def call(env)
|
|
21
|
-
response = @app.call(env)
|
|
22
|
-
|
|
23
|
-
begin
|
|
24
|
-
@session.save_messages(env[:messages])
|
|
25
|
-
rescue => e
|
|
26
|
-
warn "[brute] Session save failed: #{e.message}"
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
response
|
|
30
|
-
end
|
|
31
|
-
end
|
|
32
|
-
end
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
test do
|
|
36
|
-
require_relative "../../../spec/support/mock_provider"
|
|
37
|
-
require_relative "../../../spec/support/mock_response"
|
|
38
|
-
|
|
39
|
-
def build_env(**overrides)
|
|
40
|
-
{ provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
|
|
41
|
-
messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
|
|
42
|
-
tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
it "passes the response through unchanged" do
|
|
46
|
-
response = MockResponse.new(content: "saved response")
|
|
47
|
-
session = Struct.new(:saved) { def save_messages(m); self.saved = m; end }.new
|
|
48
|
-
inner_app = ->(_env) { response }
|
|
49
|
-
middleware = Brute::Middleware::SessionPersistence.new(inner_app, session: session)
|
|
50
|
-
result = middleware.call(build_env)
|
|
51
|
-
result.should == response
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
it "calls session.save_messages with env messages" do
|
|
55
|
-
response = MockResponse.new(content: "saved response")
|
|
56
|
-
session = Struct.new(:saved) { def save_messages(m); self.saved = m; end }.new
|
|
57
|
-
inner_app = ->(_env) { response }
|
|
58
|
-
middleware = Brute::Middleware::SessionPersistence.new(inner_app, session: session)
|
|
59
|
-
messages = [LLM::Message.new(:user, "hello")]
|
|
60
|
-
middleware.call(build_env(messages: messages))
|
|
61
|
-
session.saved.should == messages
|
|
62
|
-
end
|
|
63
|
-
|
|
64
|
-
it "does not propagate session save failures" do
|
|
65
|
-
response = MockResponse.new(content: "saved response")
|
|
66
|
-
session = Object.new
|
|
67
|
-
session.define_singleton_method(:save_messages) { |_| raise RuntimeError, "disk full" }
|
|
68
|
-
inner_app = ->(_env) { response }
|
|
69
|
-
middleware = Brute::Middleware::SessionPersistence.new(inner_app, session: session)
|
|
70
|
-
lambda { middleware.call(build_env) }.should.not.raise
|
|
71
|
-
end
|
|
72
|
-
end
|