brute 1.0.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/brute/agent.rb +72 -6
- data/lib/brute/events/handler.rb +69 -0
- data/lib/brute/events/prefixed_terminal_output.rb +72 -0
- data/lib/brute/events/terminal_output_handler.rb +68 -0
- data/lib/brute/middleware/001_otel_span.rb +77 -0
- data/lib/brute/middleware/003_tool_result_loop.rb +103 -0
- data/lib/brute/middleware/004_summarize.rb +139 -0
- data/lib/brute/middleware/005_tracing.rb +86 -0
- data/lib/brute/middleware/010_max_iterations.rb +73 -0
- data/lib/brute/middleware/015_otel_token_usage.rb +42 -0
- data/lib/brute/middleware/020_system_prompt.rb +128 -0
- data/lib/brute/middleware/040_compaction_check.rb +155 -0
- data/lib/brute/middleware/060_questions.rb +41 -0
- data/lib/brute/middleware/070_tool_call.rb +247 -0
- data/lib/brute/middleware/073_otel_tool_call.rb +49 -0
- data/lib/brute/middleware/075_otel_tool_results.rb +46 -0
- data/lib/brute/middleware/100_llm_call.rb +62 -0
- data/lib/brute/middleware/event_handler.rb +25 -0
- data/lib/brute/middleware/user_queue.rb +35 -0
- data/lib/brute/pipeline.rb +44 -107
- data/lib/brute/prompts/skills.rb +2 -2
- data/lib/brute/prompts.rb +23 -23
- data/lib/brute/providers/shell.rb +6 -19
- data/lib/brute/providers/shell_response.rb +22 -30
- data/lib/brute/session.rb +52 -0
- data/lib/brute/store/snapshot_store.rb +21 -37
- data/lib/brute/sub_agent.rb +106 -0
- data/lib/brute/system_prompt.rb +1 -83
- data/lib/brute/tool.rb +107 -0
- data/lib/brute/tools/delegate.rb +61 -70
- data/lib/brute/tools/fs_patch.rb +9 -7
- data/lib/brute/tools/fs_read.rb +233 -20
- data/lib/brute/tools/fs_remove.rb +8 -9
- data/lib/brute/tools/fs_search.rb +98 -16
- data/lib/brute/tools/fs_undo.rb +8 -8
- data/lib/brute/tools/fs_write.rb +7 -5
- data/lib/brute/tools/net_fetch.rb +8 -8
- data/lib/brute/tools/question.rb +36 -24
- data/lib/brute/tools/shell.rb +74 -16
- data/lib/brute/tools/todo_read.rb +8 -8
- data/lib/brute/tools/todo_write.rb +25 -18
- data/lib/brute/tools.rb +8 -12
- data/lib/brute/truncation.rb +219 -0
- data/lib/brute/version.rb +1 -1
- data/lib/brute.rb +82 -45
- metadata +59 -46
- data/lib/brute/loop/agent_stream.rb +0 -118
- data/lib/brute/loop/agent_turn.rb +0 -520
- data/lib/brute/loop/compactor.rb +0 -107
- data/lib/brute/loop/doom_loop.rb +0 -86
- data/lib/brute/loop/step.rb +0 -332
- data/lib/brute/loop/tool_call_step.rb +0 -90
- data/lib/brute/middleware/base.rb +0 -27
- data/lib/brute/middleware/compaction_check.rb +0 -106
- data/lib/brute/middleware/doom_loop_detection.rb +0 -136
- data/lib/brute/middleware/llm_call.rb +0 -128
- data/lib/brute/middleware/message_tracking.rb +0 -339
- data/lib/brute/middleware/otel/span.rb +0 -105
- data/lib/brute/middleware/otel/token_usage.rb +0 -68
- data/lib/brute/middleware/otel/tool_calls.rb +0 -68
- data/lib/brute/middleware/otel/tool_results.rb +0 -65
- data/lib/brute/middleware/otel.rb +0 -34
- data/lib/brute/middleware/reasoning_normalizer.rb +0 -192
- data/lib/brute/middleware/retry.rb +0 -157
- data/lib/brute/middleware/session_persistence.rb +0 -72
- data/lib/brute/middleware/token_tracking.rb +0 -124
- data/lib/brute/middleware/tool_error_tracking.rb +0 -179
- data/lib/brute/middleware/tool_use_guard.rb +0 -133
- data/lib/brute/middleware/tracing.rb +0 -124
- data/lib/brute/middleware.rb +0 -18
- data/lib/brute/orchestrator/turn.rb +0 -105
- data/lib/brute/patches/anthropic_tool_role.rb +0 -35
- data/lib/brute/patches/buffer_nil_guard.rb +0 -26
- data/lib/brute/providers/models_dev.rb +0 -111
- data/lib/brute/providers/ollama.rb +0 -135
- data/lib/brute/providers/opencode_go.rb +0 -43
- data/lib/brute/providers/opencode_zen.rb +0 -87
- data/lib/brute/providers.rb +0 -62
- data/lib/brute/queue/base_queue.rb +0 -222
- data/lib/brute/queue/parallel_queue.rb +0 -66
- data/lib/brute/queue/sequential_queue.rb +0 -63
- data/lib/brute/store/message_store.rb +0 -362
- data/lib/brute/store/session.rb +0 -106
- /data/lib/brute/{diff.rb → utils/diff.rb} +0 -0
|
@@ -1,124 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require "bundler/setup"
|
|
4
|
-
require "brute"
|
|
5
|
-
|
|
6
|
-
module Brute
|
|
7
|
-
module Middleware
|
|
8
|
-
# Tracks cumulative token usage across all LLM calls in a session.
|
|
9
|
-
#
|
|
10
|
-
# Runs POST-call: reads usage from the response and accumulates totals
|
|
11
|
-
# in env[:metadata]. Also records per-call usage for the most recent call.
|
|
12
|
-
#
|
|
13
|
-
class TokenTracking < Base
|
|
14
|
-
def initialize(app)
|
|
15
|
-
super(app)
|
|
16
|
-
@total_input = 0
|
|
17
|
-
@total_output = 0
|
|
18
|
-
@total_reasoning = 0
|
|
19
|
-
@call_count = 0
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
def call(env)
|
|
23
|
-
response = @app.call(env)
|
|
24
|
-
|
|
25
|
-
if response.respond_to?(:usage) && (usage = response.usage)
|
|
26
|
-
@total_input += usage.input_tokens.to_i
|
|
27
|
-
@total_output += usage.output_tokens.to_i
|
|
28
|
-
@total_reasoning += usage.reasoning_tokens.to_i
|
|
29
|
-
@call_count += 1
|
|
30
|
-
|
|
31
|
-
env[:metadata][:tokens] = {
|
|
32
|
-
total_input: @total_input,
|
|
33
|
-
total_output: @total_output,
|
|
34
|
-
total_reasoning: @total_reasoning,
|
|
35
|
-
total: @total_input + @total_output,
|
|
36
|
-
call_count: @call_count,
|
|
37
|
-
last_call: {
|
|
38
|
-
input: usage.input_tokens.to_i,
|
|
39
|
-
output: usage.output_tokens.to_i,
|
|
40
|
-
total: usage.total_tokens.to_i,
|
|
41
|
-
},
|
|
42
|
-
}
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
response
|
|
46
|
-
end
|
|
47
|
-
end
|
|
48
|
-
end
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
test do
|
|
52
|
-
require_relative "../../../spec/support/mock_provider"
|
|
53
|
-
require_relative "../../../spec/support/mock_response"
|
|
54
|
-
|
|
55
|
-
def build_env(**overrides)
|
|
56
|
-
{ provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
|
|
57
|
-
messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
|
|
58
|
-
tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
def make_response
|
|
62
|
-
MockResponse.new(content: "hello",
|
|
63
|
-
usage: LLM::Usage.new(input_tokens: 100, output_tokens: 50, reasoning_tokens: 10, total_tokens: 160))
|
|
64
|
-
end
|
|
65
|
-
|
|
66
|
-
it "passes the response through unchanged" do
|
|
67
|
-
response = make_response
|
|
68
|
-
middleware = Brute::Middleware::TokenTracking.new(->(_env) { response })
|
|
69
|
-
result = middleware.call(build_env)
|
|
70
|
-
result.should == response
|
|
71
|
-
end
|
|
72
|
-
|
|
73
|
-
it "populates total_input tokens" do
|
|
74
|
-
middleware = Brute::Middleware::TokenTracking.new(->(_env) { make_response })
|
|
75
|
-
env = build_env
|
|
76
|
-
middleware.call(env)
|
|
77
|
-
env[:metadata][:tokens][:total_input].should == 100
|
|
78
|
-
end
|
|
79
|
-
|
|
80
|
-
it "populates total_output tokens" do
|
|
81
|
-
middleware = Brute::Middleware::TokenTracking.new(->(_env) { make_response })
|
|
82
|
-
env = build_env
|
|
83
|
-
middleware.call(env)
|
|
84
|
-
env[:metadata][:tokens][:total_output].should == 50
|
|
85
|
-
end
|
|
86
|
-
|
|
87
|
-
it "populates total_reasoning tokens" do
|
|
88
|
-
middleware = Brute::Middleware::TokenTracking.new(->(_env) { make_response })
|
|
89
|
-
env = build_env
|
|
90
|
-
middleware.call(env)
|
|
91
|
-
env[:metadata][:tokens][:total_reasoning].should == 10
|
|
92
|
-
end
|
|
93
|
-
|
|
94
|
-
it "populates call_count" do
|
|
95
|
-
middleware = Brute::Middleware::TokenTracking.new(->(_env) { make_response })
|
|
96
|
-
env = build_env
|
|
97
|
-
middleware.call(env)
|
|
98
|
-
env[:metadata][:tokens][:call_count].should == 1
|
|
99
|
-
end
|
|
100
|
-
|
|
101
|
-
it "accumulates token counts across multiple calls" do
|
|
102
|
-
middleware = Brute::Middleware::TokenTracking.new(->(_env) { make_response })
|
|
103
|
-
env = build_env
|
|
104
|
-
middleware.call(env)
|
|
105
|
-
middleware.call(env)
|
|
106
|
-
env[:metadata][:tokens][:total_input].should == 200
|
|
107
|
-
end
|
|
108
|
-
|
|
109
|
-
it "handles a response without usage gracefully" do
|
|
110
|
-
no_usage = Object.new
|
|
111
|
-
middleware = Brute::Middleware::TokenTracking.new(->(_env) { no_usage })
|
|
112
|
-
env = build_env
|
|
113
|
-
middleware.call(env)
|
|
114
|
-
env[:metadata][:tokens].should.be.nil
|
|
115
|
-
end
|
|
116
|
-
|
|
117
|
-
it "handles a response where usage returns nil" do
|
|
118
|
-
nil_usage = Struct.new(:usage).new(nil)
|
|
119
|
-
middleware = Brute::Middleware::TokenTracking.new(->(_env) { nil_usage })
|
|
120
|
-
env = build_env
|
|
121
|
-
middleware.call(env)
|
|
122
|
-
env[:metadata][:tokens].should.be.nil
|
|
123
|
-
end
|
|
124
|
-
end
|
|
@@ -1,179 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require "bundler/setup"
|
|
4
|
-
require "brute"
|
|
5
|
-
|
|
6
|
-
module Brute
|
|
7
|
-
module Middleware
|
|
8
|
-
# Tracks per-tool error counts and total tool call count across LLM
|
|
9
|
-
# calls, and signals when the error ceiling is reached.
|
|
10
|
-
#
|
|
11
|
-
# This middleware doesn't execute tools itself — it inspects the tool
|
|
12
|
-
# results that were sent as input to the LLM call (env[:tool_results])
|
|
13
|
-
# and counts failures and totals.
|
|
14
|
-
#
|
|
15
|
-
# When any tool exceeds max_failures, it sets env[:metadata][:tool_error_limit_reached]
|
|
16
|
-
# so the agent loop can decide to stop.
|
|
17
|
-
#
|
|
18
|
-
# Also stores env[:metadata][:tool_calls] with the cumulative number of
|
|
19
|
-
# tool invocations in the current session.
|
|
20
|
-
#
|
|
21
|
-
class ToolErrorTracking < Base
|
|
22
|
-
DEFAULT_MAX_FAILURES = 3
|
|
23
|
-
|
|
24
|
-
def initialize(app, max_failures: DEFAULT_MAX_FAILURES)
|
|
25
|
-
super(app)
|
|
26
|
-
@max_failures = max_failures
|
|
27
|
-
@errors = Hash.new(0) # tool_name → count
|
|
28
|
-
@total_tool_calls = 0
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
def call(env)
|
|
32
|
-
# PRE: count errors and totals from tool results that are about to be sent
|
|
33
|
-
if (results = env[:tool_results])
|
|
34
|
-
@total_tool_calls += results.size
|
|
35
|
-
|
|
36
|
-
results.each do |name, result|
|
|
37
|
-
@errors[name] += 1 if result.is_a?(Hash) && result[:error]
|
|
38
|
-
end
|
|
39
|
-
end
|
|
40
|
-
|
|
41
|
-
env[:metadata][:tool_calls] = @total_tool_calls
|
|
42
|
-
env[:metadata][:tool_errors] = @errors.dup
|
|
43
|
-
env[:metadata][:tool_error_limit_reached] = @errors.any? { |_, c| c >= @max_failures }
|
|
44
|
-
|
|
45
|
-
if env[:metadata][:tool_error_limit_reached]
|
|
46
|
-
failed_tool, fail_count = @errors.max_by { |_, c| c }
|
|
47
|
-
env[:should_exit] ||= {
|
|
48
|
-
reason: "tool_error_limit_reached",
|
|
49
|
-
message: "Tool '#{failed_tool}' has failed #{fail_count} times (limit: #{@max_failures}). Stopping.",
|
|
50
|
-
source: "ToolErrorTracking",
|
|
51
|
-
}
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
@app.call(env)
|
|
55
|
-
end
|
|
56
|
-
|
|
57
|
-
# Reset counts (e.g., between user turns).
|
|
58
|
-
def reset!
|
|
59
|
-
@errors.clear
|
|
60
|
-
@total_tool_calls = 0
|
|
61
|
-
end
|
|
62
|
-
end
|
|
63
|
-
end
|
|
64
|
-
end
|
|
65
|
-
|
|
66
|
-
test do
|
|
67
|
-
require_relative "../../../spec/support/mock_provider"
|
|
68
|
-
require_relative "../../../spec/support/mock_response"
|
|
69
|
-
|
|
70
|
-
def build_env(**overrides)
|
|
71
|
-
{ provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
|
|
72
|
-
messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
|
|
73
|
-
tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
|
|
74
|
-
end
|
|
75
|
-
|
|
76
|
-
def make_middleware(app = nil)
|
|
77
|
-
app ||= ->(_env) { MockResponse.new(content: "tracked") }
|
|
78
|
-
Brute::Middleware::ToolErrorTracking.new(app, max_failures: 3)
|
|
79
|
-
end
|
|
80
|
-
|
|
81
|
-
it "passes the response through" do
|
|
82
|
-
response = MockResponse.new(content: "tracked")
|
|
83
|
-
app = ->(_env) { response }
|
|
84
|
-
result = make_middleware(app).call(build_env)
|
|
85
|
-
result.should == response
|
|
86
|
-
end
|
|
87
|
-
|
|
88
|
-
it "reports zero tool calls when tool_results is nil" do
|
|
89
|
-
env = build_env(tool_results: nil)
|
|
90
|
-
make_middleware.call(env)
|
|
91
|
-
env[:metadata][:tool_calls].should == 0
|
|
92
|
-
end
|
|
93
|
-
|
|
94
|
-
it "reports empty tool errors when tool_results is nil" do
|
|
95
|
-
env = build_env(tool_results: nil)
|
|
96
|
-
make_middleware.call(env)
|
|
97
|
-
env[:metadata][:tool_errors].should == {}
|
|
98
|
-
end
|
|
99
|
-
|
|
100
|
-
it "does not flag limit reached when tool_results is nil" do
|
|
101
|
-
env = build_env(tool_results: nil)
|
|
102
|
-
make_middleware.call(env)
|
|
103
|
-
env[:metadata][:tool_error_limit_reached].should.be.false
|
|
104
|
-
end
|
|
105
|
-
|
|
106
|
-
it "counts total tool calls from tool_results" do
|
|
107
|
-
results = [["fs_read", { content: "data" }], ["shell", { output: "ok" }], ["fs_write", { success: true }]]
|
|
108
|
-
env = build_env(tool_results: results)
|
|
109
|
-
make_middleware.call(env)
|
|
110
|
-
env[:metadata][:tool_calls].should == 3
|
|
111
|
-
end
|
|
112
|
-
|
|
113
|
-
it "counts per-tool errors from results with error key" do
|
|
114
|
-
results = [["fs_read", { error: "not found" }], ["fs_read", { error: "denied" }], ["shell", { output: "ok" }]]
|
|
115
|
-
env = build_env(tool_results: results)
|
|
116
|
-
make_middleware.call(env)
|
|
117
|
-
env[:metadata][:tool_errors].should == { "fs_read" => 2 }
|
|
118
|
-
end
|
|
119
|
-
|
|
120
|
-
it "sets tool_error_limit_reached when a tool hits max_failures" do
|
|
121
|
-
results = [["fs_read", { error: "1" }], ["fs_read", { error: "2" }], ["fs_read", { error: "3" }]]
|
|
122
|
-
env = build_env(tool_results: results)
|
|
123
|
-
make_middleware.call(env)
|
|
124
|
-
env[:metadata][:tool_error_limit_reached].should.be.true
|
|
125
|
-
end
|
|
126
|
-
|
|
127
|
-
it "does not flag below the threshold" do
|
|
128
|
-
results = [["fs_read", { error: "1" }], ["fs_read", { error: "2" }]]
|
|
129
|
-
env = build_env(tool_results: results)
|
|
130
|
-
make_middleware.call(env)
|
|
131
|
-
env[:metadata][:tool_error_limit_reached].should.be.false
|
|
132
|
-
end
|
|
133
|
-
|
|
134
|
-
it "accumulates counts across multiple calls" do
|
|
135
|
-
mw = make_middleware
|
|
136
|
-
mw.call(build_env(tool_results: [["fs_read", { error: "fail" }]]))
|
|
137
|
-
env2 = build_env(tool_results: [["fs_read", { error: "again" }], ["shell", { output: "ok" }]])
|
|
138
|
-
mw.call(env2)
|
|
139
|
-
env2[:metadata][:tool_calls].should == 3
|
|
140
|
-
end
|
|
141
|
-
|
|
142
|
-
it "clears counters on reset!" do
|
|
143
|
-
mw = make_middleware
|
|
144
|
-
mw.call(build_env(tool_results: [["fs_read", { error: "fail" }]]))
|
|
145
|
-
mw.reset!
|
|
146
|
-
env2 = build_env(tool_results: nil)
|
|
147
|
-
mw.call(env2)
|
|
148
|
-
env2[:metadata][:tool_calls].should == 0
|
|
149
|
-
end
|
|
150
|
-
|
|
151
|
-
it "sets should_exit reason when error limit reached" do
|
|
152
|
-
results = [["fs_read", { error: "1" }], ["fs_read", { error: "2" }], ["fs_read", { error: "3" }]]
|
|
153
|
-
env = build_env(tool_results: results)
|
|
154
|
-
make_middleware.call(env)
|
|
155
|
-
env[:should_exit][:reason].should == "tool_error_limit_reached"
|
|
156
|
-
end
|
|
157
|
-
|
|
158
|
-
it "sets should_exit source to ToolErrorTracking" do
|
|
159
|
-
results = [["fs_read", { error: "1" }], ["fs_read", { error: "2" }], ["fs_read", { error: "3" }]]
|
|
160
|
-
env = build_env(tool_results: results)
|
|
161
|
-
make_middleware.call(env)
|
|
162
|
-
env[:should_exit][:source].should == "ToolErrorTracking"
|
|
163
|
-
end
|
|
164
|
-
|
|
165
|
-
it "does not set should_exit below the threshold" do
|
|
166
|
-
results = [["fs_read", { error: "1" }], ["fs_read", { error: "2" }]]
|
|
167
|
-
env = build_env(tool_results: results)
|
|
168
|
-
make_middleware.call(env)
|
|
169
|
-
env[:should_exit].should.be.nil
|
|
170
|
-
end
|
|
171
|
-
|
|
172
|
-
it "does not overwrite should_exit if already set" do
|
|
173
|
-
results = [["fs_read", { error: "1" }], ["fs_read", { error: "2" }], ["fs_read", { error: "3" }]]
|
|
174
|
-
existing = { reason: "doom_loop_detected", message: "loop", source: "DoomLoopDetection" }
|
|
175
|
-
env = build_env(tool_results: results, should_exit: existing)
|
|
176
|
-
make_middleware.call(env)
|
|
177
|
-
env[:should_exit][:reason].should == "doom_loop_detected"
|
|
178
|
-
end
|
|
179
|
-
end
|
|
@@ -1,133 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require "bundler/setup"
|
|
4
|
-
require "brute"
|
|
5
|
-
|
|
6
|
-
module Brute
|
|
7
|
-
module Middleware
|
|
8
|
-
# Guards against tool-only LLM responses where the assistant message
|
|
9
|
-
# is dropped from the context buffer.
|
|
10
|
-
#
|
|
11
|
-
# When the LLM responds with only tool_use blocks (no text), llm.rb's
|
|
12
|
-
# response adapter produces empty choices. The assistant message carrying
|
|
13
|
-
# tool_use blocks may be lost. This causes "unexpected tool_use_id" on
|
|
14
|
-
# the next call because tool_result references a tool_use that's missing
|
|
15
|
-
# from the message history.
|
|
16
|
-
#
|
|
17
|
-
# This middleware runs post-call and ensures every pending tool_use ID
|
|
18
|
-
# is covered by an assistant message in env[:messages]. It handles three
|
|
19
|
-
# cases:
|
|
20
|
-
#
|
|
21
|
-
# 1. pending_functions is non-empty and the assistant message exists → no-op
|
|
22
|
-
# 2. pending_functions is non-empty but the assistant message is missing
|
|
23
|
-
# (or has different IDs) → inject synthetic message
|
|
24
|
-
# 3. pending_functions is empty (nil-choice bug) but the stream recorded
|
|
25
|
-
# tool calls → inject synthetic message using stream metadata
|
|
26
|
-
#
|
|
27
|
-
class ToolUseGuard
|
|
28
|
-
def initialize(app)
|
|
29
|
-
@app = app
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
def call(env)
|
|
33
|
-
response = @app.call(env)
|
|
34
|
-
|
|
35
|
-
# Collect pending tool data from env[:pending_functions] (primary)
|
|
36
|
-
# or the stream's recorded metadata (fallback for nil-choice bug).
|
|
37
|
-
tool_data = collect_tool_data(env)
|
|
38
|
-
return response if tool_data.empty?
|
|
39
|
-
|
|
40
|
-
# Find all tool_use IDs already covered by assistant messages.
|
|
41
|
-
covered_ids = covered_tool_ids(env[:messages])
|
|
42
|
-
|
|
43
|
-
# Inject a synthetic assistant message for any uncovered tool calls.
|
|
44
|
-
uncovered = tool_data.reject { |td| covered_ids.include?(td[:id]) }
|
|
45
|
-
inject_synthetic!(env[:messages], uncovered) unless uncovered.empty?
|
|
46
|
-
|
|
47
|
-
response
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
private
|
|
51
|
-
|
|
52
|
-
def collect_tool_data(env)
|
|
53
|
-
functions = env[:pending_functions]
|
|
54
|
-
if functions && !functions.empty?
|
|
55
|
-
functions.map { |fn| { id: fn.id, name: fn.name, arguments: fn.arguments } }
|
|
56
|
-
elsif env[:streaming]
|
|
57
|
-
stream = env[:stream]
|
|
58
|
-
if stream&.respond_to?(:pending_tool_calls)
|
|
59
|
-
data = stream.pending_tool_calls.dup
|
|
60
|
-
stream.clear_pending_tool_calls!
|
|
61
|
-
data
|
|
62
|
-
else
|
|
63
|
-
[]
|
|
64
|
-
end
|
|
65
|
-
else
|
|
66
|
-
[]
|
|
67
|
-
end
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
def covered_tool_ids(messages)
|
|
71
|
-
messages
|
|
72
|
-
.select { |m| m.role.to_s == "assistant" && m.tool_call? }
|
|
73
|
-
.flat_map { |m| (m.extra.original_tool_calls || []).map { |tc| tc["id"] } }
|
|
74
|
-
.to_set
|
|
75
|
-
end
|
|
76
|
-
|
|
77
|
-
def inject_synthetic!(messages, uncovered)
|
|
78
|
-
tool_calls = uncovered.map do |td|
|
|
79
|
-
LLM::Object.from(id: td[:id], name: td[:name], arguments: td[:arguments])
|
|
80
|
-
end
|
|
81
|
-
original_tool_calls = uncovered.map do |td|
|
|
82
|
-
{ "type" => "tool_use", "id" => td[:id], "name" => td[:name], "input" => td[:arguments] || {} }
|
|
83
|
-
end
|
|
84
|
-
|
|
85
|
-
synthetic = LLM::Message.new(:assistant, "", {
|
|
86
|
-
tool_calls: tool_calls,
|
|
87
|
-
original_tool_calls: original_tool_calls,
|
|
88
|
-
})
|
|
89
|
-
messages << synthetic
|
|
90
|
-
end
|
|
91
|
-
end
|
|
92
|
-
end
|
|
93
|
-
end
|
|
94
|
-
|
|
95
|
-
test do
|
|
96
|
-
require_relative "../../../spec/support/mock_provider"
|
|
97
|
-
require_relative "../../../spec/support/mock_response"
|
|
98
|
-
|
|
99
|
-
def build_env(**overrides)
|
|
100
|
-
{ provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
|
|
101
|
-
messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
|
|
102
|
-
tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
|
|
103
|
-
end
|
|
104
|
-
|
|
105
|
-
it "passes the response through when there are no pending functions" do
|
|
106
|
-
response = MockResponse.new(content: "no tools")
|
|
107
|
-
inner_app = ->(_env) { response }
|
|
108
|
-
middleware = Brute::Middleware::ToolUseGuard.new(inner_app)
|
|
109
|
-
result = middleware.call(build_env(pending_functions: []))
|
|
110
|
-
result.should == response
|
|
111
|
-
end
|
|
112
|
-
|
|
113
|
-
it "injects a synthetic assistant message when tool calls exist but assistant is missing" do
|
|
114
|
-
fn = Struct.new(:id, :name, :arguments, keyword_init: true)
|
|
115
|
-
.new(id: "toolu_1", name: "fs_read", arguments: { "path" => "test.rb" })
|
|
116
|
-
response = MockResponse.new(content: "")
|
|
117
|
-
inner_app = ->(_env) { response }
|
|
118
|
-
middleware = Brute::Middleware::ToolUseGuard.new(inner_app)
|
|
119
|
-
env = build_env(messages: [], pending_functions: [fn])
|
|
120
|
-
lambda { middleware.call(env) }.should.not.raise
|
|
121
|
-
end
|
|
122
|
-
|
|
123
|
-
it "creates one assistant message for uncovered tool calls" do
|
|
124
|
-
fn = Struct.new(:id, :name, :arguments, keyword_init: true)
|
|
125
|
-
.new(id: "toolu_1", name: "fs_read", arguments: { "path" => "test.rb" })
|
|
126
|
-
response = MockResponse.new(content: "")
|
|
127
|
-
inner_app = ->(_env) { response }
|
|
128
|
-
middleware = Brute::Middleware::ToolUseGuard.new(inner_app)
|
|
129
|
-
env = build_env(messages: [], pending_functions: [fn])
|
|
130
|
-
middleware.call(env)
|
|
131
|
-
env[:messages].select { |m| m.role.to_s == "assistant" }.size.should == 1
|
|
132
|
-
end
|
|
133
|
-
end
|
|
@@ -1,124 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require "bundler/setup"
|
|
4
|
-
require "brute"
|
|
5
|
-
|
|
6
|
-
module Brute
|
|
7
|
-
module Middleware
|
|
8
|
-
# Logs timing and token usage for every LLM call, and tracks cumulative
|
|
9
|
-
# timing data in env[:metadata][:timing].
|
|
10
|
-
#
|
|
11
|
-
# As the outermost middleware, it sees the full pipeline elapsed time per
|
|
12
|
-
# call. It also tracks total wall-clock time across all calls in a turn
|
|
13
|
-
# (including tool execution gaps between LLM calls).
|
|
14
|
-
#
|
|
15
|
-
# A new turn is detected when env[:tool_results] is nil (the agent loop
|
|
16
|
-
# sets this on the first call of each run()).
|
|
17
|
-
#
|
|
18
|
-
# Stores in env[:metadata][:timing]:
|
|
19
|
-
# total_elapsed: wall-clock since the turn began (includes tool gaps)
|
|
20
|
-
# total_llm_elapsed: cumulative time spent inside LLM calls only
|
|
21
|
-
# llm_call_count: number of LLM calls so far
|
|
22
|
-
# last_call_elapsed: duration of the most recent LLM call
|
|
23
|
-
#
|
|
24
|
-
class Tracing < Base
|
|
25
|
-
def initialize(app, logger:)
|
|
26
|
-
super(app)
|
|
27
|
-
@logger = logger
|
|
28
|
-
@call_count = 0
|
|
29
|
-
@total_llm_elapsed = 0.0
|
|
30
|
-
@turn_start = nil
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
def call(env)
|
|
34
|
-
@call_count += 1
|
|
35
|
-
|
|
36
|
-
# Detect new turn: tool_results is nil on the first pipeline call
|
|
37
|
-
if env[:tool_results].nil?
|
|
38
|
-
@turn_start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
39
|
-
@total_llm_elapsed = 0.0
|
|
40
|
-
end
|
|
41
|
-
|
|
42
|
-
messages = env[:messages]
|
|
43
|
-
@logger.debug("[brute] LLM call ##{@call_count} (#{messages.size} messages in context)")
|
|
44
|
-
|
|
45
|
-
start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
46
|
-
response = @app.call(env)
|
|
47
|
-
now = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
48
|
-
elapsed = now - start
|
|
49
|
-
|
|
50
|
-
@total_llm_elapsed += elapsed
|
|
51
|
-
|
|
52
|
-
tokens = response.respond_to?(:usage) ? response.usage&.total_tokens : '?'
|
|
53
|
-
@logger.info("[brute] LLM response ##{@call_count}: #{tokens} tokens, #{elapsed.round(2)}s")
|
|
54
|
-
|
|
55
|
-
env[:metadata][:timing] = {
|
|
56
|
-
total_elapsed: now - (@turn_start || start),
|
|
57
|
-
total_llm_elapsed: @total_llm_elapsed,
|
|
58
|
-
llm_call_count: @call_count,
|
|
59
|
-
last_call_elapsed: elapsed
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
response
|
|
63
|
-
end
|
|
64
|
-
end
|
|
65
|
-
end
|
|
66
|
-
end
|
|
67
|
-
|
|
68
|
-
test do
|
|
69
|
-
require_relative "../../../spec/support/mock_provider"
|
|
70
|
-
require_relative "../../../spec/support/mock_response"
|
|
71
|
-
|
|
72
|
-
def build_env(**overrides)
|
|
73
|
-
{ provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
|
|
74
|
-
messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
|
|
75
|
-
tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
|
|
76
|
-
end
|
|
77
|
-
|
|
78
|
-
it "passes the response through unchanged" do
|
|
79
|
-
response = MockResponse.new(content: "traced response")
|
|
80
|
-
inner_app = ->(_env) { response }
|
|
81
|
-
middleware = Brute::Middleware::Tracing.new(inner_app, logger: Logger.new(StringIO.new))
|
|
82
|
-
result = middleware.call(build_env(tool_results: nil))
|
|
83
|
-
result.should == response
|
|
84
|
-
end
|
|
85
|
-
|
|
86
|
-
it "populates timing with llm_call_count" do
|
|
87
|
-
response = MockResponse.new(content: "traced response")
|
|
88
|
-
inner_app = ->(_env) { response }
|
|
89
|
-
middleware = Brute::Middleware::Tracing.new(inner_app, logger: Logger.new(StringIO.new))
|
|
90
|
-
env = build_env(tool_results: nil)
|
|
91
|
-
middleware.call(env)
|
|
92
|
-
env[:metadata][:timing][:llm_call_count].should == 1
|
|
93
|
-
end
|
|
94
|
-
|
|
95
|
-
it "populates timing with non-negative last_call_elapsed" do
|
|
96
|
-
response = MockResponse.new(content: "traced response")
|
|
97
|
-
inner_app = ->(_env) { response }
|
|
98
|
-
middleware = Brute::Middleware::Tracing.new(inner_app, logger: Logger.new(StringIO.new))
|
|
99
|
-
env = build_env(tool_results: nil)
|
|
100
|
-
middleware.call(env)
|
|
101
|
-
(env[:metadata][:timing][:last_call_elapsed] >= 0).should.be.true
|
|
102
|
-
end
|
|
103
|
-
|
|
104
|
-
it "accumulates call count across multiple calls" do
|
|
105
|
-
response = MockResponse.new(content: "traced response")
|
|
106
|
-
inner_app = ->(_env) { response }
|
|
107
|
-
middleware = Brute::Middleware::Tracing.new(inner_app, logger: Logger.new(StringIO.new))
|
|
108
|
-
env = build_env(tool_results: nil)
|
|
109
|
-
middleware.call(env)
|
|
110
|
-
env[:tool_results] = [["read", {}]]
|
|
111
|
-
middleware.call(env)
|
|
112
|
-
middleware.call(env)
|
|
113
|
-
env[:metadata][:timing][:llm_call_count].should == 3
|
|
114
|
-
end
|
|
115
|
-
|
|
116
|
-
it "logs LLM call and response messages" do
|
|
117
|
-
response = MockResponse.new(content: "traced response")
|
|
118
|
-
inner_app = ->(_env) { response }
|
|
119
|
-
log_output = StringIO.new
|
|
120
|
-
middleware = Brute::Middleware::Tracing.new(inner_app, logger: Logger.new(log_output))
|
|
121
|
-
middleware.call(build_env(tool_results: nil))
|
|
122
|
-
log_output.string.should =~ /LLM call #1/
|
|
123
|
-
end
|
|
124
|
-
end
|
data/lib/brute/middleware.rb
DELETED
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
require_relative 'middleware/base'
|
|
2
|
-
require_relative 'middleware/llm_call'
|
|
3
|
-
require_relative 'middleware/retry'
|
|
4
|
-
require_relative 'middleware/doom_loop_detection'
|
|
5
|
-
require_relative 'middleware/token_tracking'
|
|
6
|
-
require_relative 'middleware/compaction_check'
|
|
7
|
-
require_relative 'middleware/session_persistence'
|
|
8
|
-
require_relative 'middleware/message_tracking'
|
|
9
|
-
require_relative 'middleware/tracing'
|
|
10
|
-
require_relative 'middleware/tool_error_tracking'
|
|
11
|
-
require_relative 'middleware/reasoning_normalizer'
|
|
12
|
-
require_relative "middleware/tool_use_guard"
|
|
13
|
-
require_relative "middleware/otel"
|
|
14
|
-
|
|
15
|
-
module Brute
|
|
16
|
-
module Middleware
|
|
17
|
-
end
|
|
18
|
-
end
|
|
@@ -1,105 +0,0 @@
|
|
|
1
|
-
module Brute
|
|
2
|
-
class Orchestrator
|
|
3
|
-
class Turn
|
|
4
|
-
def initialize(env:, pending:)
|
|
5
|
-
@env = env
|
|
6
|
-
@pending = pending
|
|
7
|
-
end
|
|
8
|
-
|
|
9
|
-
def perform
|
|
10
|
-
@env.dig(:callbacks, :on_tool_call_start).then do |on_start|
|
|
11
|
-
on_start&.call(
|
|
12
|
-
@pending.map do |tool, _|
|
|
13
|
-
{
|
|
14
|
-
name: tool.name,
|
|
15
|
-
arguments: tool.arguments
|
|
16
|
-
}
|
|
17
|
-
end
|
|
18
|
-
)
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
execute_tool_calls.tap do |results|
|
|
22
|
-
errors.each do |_, err|
|
|
23
|
-
on_result = @env.dig(:callbacks, :on_tool_result)
|
|
24
|
-
on_result&.call(err.name, result_value(err))
|
|
25
|
-
results << err
|
|
26
|
-
end
|
|
27
|
-
end
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
def errors = @pending.select { |_, err| err }
|
|
31
|
-
def executable = @pending.reject { |_, err| err }.map(&:first)
|
|
32
|
-
|
|
33
|
-
def execute_tool_calls
|
|
34
|
-
if executable.empty?
|
|
35
|
-
[]
|
|
36
|
-
else
|
|
37
|
-
# Questions block execution — they must complete before other tools
|
|
38
|
-
# run, since the LLM may need the answer to inform subsequent work.
|
|
39
|
-
# Execute any question tools first (sequentially), then dispatch
|
|
40
|
-
# the remaining tools concurrently.
|
|
41
|
-
questions, others = executable.partition { _1.name == "question" }
|
|
42
|
-
|
|
43
|
-
Array.new.tap do |results|
|
|
44
|
-
if questions.any?
|
|
45
|
-
results.concat(execute_sequential(questions))
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
if others.size <= 1
|
|
49
|
-
results.concat(execute_sequential(others))
|
|
50
|
-
else
|
|
51
|
-
results.concat(execute_parallel(others))
|
|
52
|
-
end
|
|
53
|
-
end
|
|
54
|
-
end
|
|
55
|
-
end
|
|
56
|
-
|
|
57
|
-
# Run a single tool call synchronously.
|
|
58
|
-
def execute_sequential(functions)
|
|
59
|
-
on_result = @env.dig(:callbacks, :on_tool_result)
|
|
60
|
-
on_question = @env.dig(:callbacks, :on_question)
|
|
61
|
-
|
|
62
|
-
functions.map do |fn|
|
|
63
|
-
Thread.current[:on_question] = on_question
|
|
64
|
-
result = fn.call
|
|
65
|
-
on_result&.call(fn.name, result_value(result))
|
|
66
|
-
result
|
|
67
|
-
end
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
# Run all pending tool calls concurrently via Async::Barrier.
|
|
71
|
-
#
|
|
72
|
-
# Each tool runs in its own fiber. File-mutating tools are safe because
|
|
73
|
-
# they go through FileMutationQueue, whose Mutex is fiber-scheduler-aware
|
|
74
|
-
# in Ruby 3.4 — a fiber blocked on a per-file mutex yields to other
|
|
75
|
-
# fibers instead of blocking the thread.
|
|
76
|
-
#
|
|
77
|
-
# The barrier is stored in @barrier so abort! can cancel in-flight tools.
|
|
78
|
-
#
|
|
79
|
-
def execute_parallel(functions)
|
|
80
|
-
on_result = @env.dig(:callbacks, :on_tool_result)
|
|
81
|
-
on_question = @env.dig(:callbacks, :on_question)
|
|
82
|
-
|
|
83
|
-
Array.new(functions.size).tap do |results|
|
|
84
|
-
Async do
|
|
85
|
-
@barrier = Async::Barrier.new
|
|
86
|
-
|
|
87
|
-
functions.each_with_index do |fn, i|
|
|
88
|
-
@barrier.async do
|
|
89
|
-
Thread.current[:on_question] = on_question
|
|
90
|
-
results[i] = fn.call
|
|
91
|
-
r = results[i]
|
|
92
|
-
on_result&.call(r.name, result_value(r))
|
|
93
|
-
end
|
|
94
|
-
end
|
|
95
|
-
|
|
96
|
-
@barrier.wait
|
|
97
|
-
ensure
|
|
98
|
-
@barrier&.stop
|
|
99
|
-
@barrier = nil
|
|
100
|
-
end
|
|
101
|
-
end
|
|
102
|
-
end
|
|
103
|
-
end
|
|
104
|
-
end
|
|
105
|
-
end
|