brute 0.4.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/brute/agent.rb +14 -0
- data/lib/brute/diff.rb +24 -0
- data/lib/brute/loop/agent_stream.rb +118 -0
- data/lib/brute/loop/agent_turn.rb +520 -0
- data/lib/brute/{compactor.rb → loop/compactor.rb} +2 -0
- data/lib/brute/{doom_loop.rb → loop/doom_loop.rb} +2 -0
- data/lib/brute/loop/step.rb +332 -0
- data/lib/brute/loop/tool_call_step.rb +90 -0
- data/lib/brute/middleware/compaction_check.rb +70 -23
- data/lib/brute/middleware/doom_loop_detection.rb +110 -7
- data/lib/brute/middleware/llm_call.rb +88 -1
- data/lib/brute/middleware/message_tracking.rb +140 -10
- data/lib/brute/middleware/otel/span.rb +32 -2
- data/lib/brute/middleware/otel/token_usage.rb +38 -0
- data/lib/brute/middleware/otel/tool_calls.rb +30 -1
- data/lib/brute/middleware/otel/tool_results.rb +29 -1
- data/lib/brute/middleware/otel.rb +5 -0
- data/lib/brute/middleware/reasoning_normalizer.rb +94 -0
- data/lib/brute/middleware/retry.rb +113 -1
- data/lib/brute/middleware/session_persistence.rb +46 -3
- data/lib/brute/middleware/token_tracking.rb +78 -0
- data/lib/brute/middleware/tool_error_tracking.rb +128 -1
- data/lib/brute/middleware/tool_use_guard.rb +64 -28
- data/lib/brute/middleware/tracing.rb +63 -2
- data/lib/brute/middleware.rb +18 -0
- data/lib/brute/orchestrator/turn.rb +105 -0
- data/lib/brute/patches/buffer_nil_guard.rb +5 -0
- data/lib/brute/pipeline.rb +86 -7
- data/lib/brute/prompts/build_switch.rb +29 -0
- data/lib/brute/prompts/environment.rb +43 -0
- data/lib/brute/prompts/identity.rb +29 -0
- data/lib/brute/prompts/instructions.rb +21 -0
- data/lib/brute/prompts/max_steps.rb +25 -0
- data/lib/brute/prompts/plan_reminder.rb +25 -0
- data/lib/brute/prompts/skills.rb +13 -0
- data/lib/brute/prompts.rb +28 -0
- data/lib/brute/providers/ollama.rb +135 -0
- data/lib/brute/providers/opencode_go.rb +5 -0
- data/lib/brute/providers/opencode_zen.rb +7 -2
- data/lib/brute/providers/shell.rb +2 -2
- data/lib/brute/providers/shell_response.rb +7 -2
- data/lib/brute/providers.rb +62 -0
- data/lib/brute/queue/base_queue.rb +222 -0
- data/lib/brute/{file_mutation_queue.rb → queue/file_mutation_queue.rb} +28 -26
- data/lib/brute/queue/parallel_queue.rb +66 -0
- data/lib/brute/queue/sequential_queue.rb +63 -0
- data/lib/brute/{message_store.rb → store/message_store.rb} +155 -62
- data/lib/brute/store/session.rb +106 -0
- data/lib/brute/{snapshot_store.rb → store/snapshot_store.rb} +2 -0
- data/lib/brute/{todo_store.rb → store/todo_store.rb} +2 -0
- data/lib/brute/system_prompt.rb +101 -0
- data/lib/brute/tools/delegate.rb +59 -0
- data/lib/brute/tools/fs_patch.rb +54 -2
- data/lib/brute/tools/fs_read.rb +5 -0
- data/lib/brute/tools/fs_remove.rb +7 -2
- data/lib/brute/tools/fs_search.rb +5 -0
- data/lib/brute/tools/fs_undo.rb +7 -2
- data/lib/brute/tools/fs_write.rb +40 -2
- data/lib/brute/tools/net_fetch.rb +5 -0
- data/lib/brute/tools/question.rb +5 -0
- data/lib/brute/tools/shell.rb +5 -0
- data/lib/brute/tools/todo_read.rb +6 -1
- data/lib/brute/tools/todo_write.rb +6 -1
- data/lib/brute/tools.rb +31 -0
- data/lib/brute/version.rb +1 -1
- data/lib/brute.rb +40 -204
- metadata +31 -20
- data/lib/brute/agent_stream.rb +0 -63
- data/lib/brute/hooks.rb +0 -84
- data/lib/brute/orchestrator.rb +0 -391
- data/lib/brute/session.rb +0 -161
|
@@ -1,29 +1,39 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "bundler/setup"
|
|
4
|
+
require "brute"
|
|
5
|
+
|
|
3
6
|
module Brute
|
|
4
7
|
module Middleware
|
|
5
8
|
# Detects when the agent is stuck repeating tool call patterns and injects
|
|
6
|
-
# a corrective warning into the
|
|
9
|
+
# a corrective warning into the message history before the next LLM call.
|
|
7
10
|
#
|
|
8
11
|
# Runs PRE-call: inspects the conversation history for repeating tool call
|
|
9
|
-
# patterns. If detected,
|
|
10
|
-
#
|
|
12
|
+
# patterns. If detected, appends a warning message so the LLM sees it as
|
|
13
|
+
# input alongside the normal tool results.
|
|
11
14
|
#
|
|
12
15
|
class DoomLoopDetection < Base
|
|
13
16
|
def initialize(app, threshold: 3)
|
|
14
17
|
super(app)
|
|
15
|
-
@detector = Brute::DoomLoopDetector.new(threshold: threshold)
|
|
18
|
+
@detector = Brute::Loop::DoomLoopDetector.new(threshold: threshold)
|
|
16
19
|
end
|
|
17
20
|
|
|
18
21
|
def call(env)
|
|
19
|
-
|
|
20
|
-
messages = ctx.messages.to_a
|
|
22
|
+
messages = env[:messages]
|
|
21
23
|
|
|
22
24
|
if (reps = @detector.detect(messages))
|
|
23
25
|
warning = @detector.warning_message(reps)
|
|
24
26
|
# Inject the warning as a user message so the LLM sees it
|
|
25
|
-
|
|
27
|
+
env[:messages] << LLM::Message.new(:user, warning)
|
|
26
28
|
env[:metadata][:doom_loop_detected] = reps
|
|
29
|
+
|
|
30
|
+
# Signal the agent loop to exit after this LLM call completes.
|
|
31
|
+
# First-writer-wins: don't overwrite if another middleware already set it.
|
|
32
|
+
env[:should_exit] ||= {
|
|
33
|
+
reason: "doom_loop_detected",
|
|
34
|
+
message: "Agent is stuck repeating the same tool calls (#{reps} repetitions).",
|
|
35
|
+
source: "DoomLoopDetection",
|
|
36
|
+
}
|
|
27
37
|
end
|
|
28
38
|
|
|
29
39
|
@app.call(env)
|
|
@@ -31,3 +41,96 @@ module Brute
|
|
|
31
41
|
end
|
|
32
42
|
end
|
|
33
43
|
end
|
|
44
|
+
|
|
45
|
+
test do
|
|
46
|
+
require_relative "../../../spec/support/mock_provider"
|
|
47
|
+
require_relative "../../../spec/support/mock_response"
|
|
48
|
+
|
|
49
|
+
def build_env(**overrides)
|
|
50
|
+
{ provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
|
|
51
|
+
messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
|
|
52
|
+
tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
FakeFunc = Struct.new(:name, :arguments, keyword_init: true)
|
|
56
|
+
|
|
57
|
+
def assistant_msg_with_functions(function_list)
|
|
58
|
+
msg = LLM::Message.new(:assistant, "tool msg", {})
|
|
59
|
+
msg.define_singleton_method(:functions) { function_list }
|
|
60
|
+
msg
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
it "passes through when no doom loop is detected" do
|
|
64
|
+
inner_app = ->(_env) { MockResponse.new(content: "loop check") }
|
|
65
|
+
middleware = Brute::Middleware::DoomLoopDetection.new(inner_app, threshold: 3)
|
|
66
|
+
env = build_env
|
|
67
|
+
middleware.call(env)
|
|
68
|
+
env[:metadata][:doom_loop_detected].should.be.nil
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
it "detects consecutive identical tool calls" do
|
|
72
|
+
inner_app = ->(_env) { MockResponse.new(content: "loop check") }
|
|
73
|
+
fn = FakeFunc.new(name: "fs_read", arguments: '{"path":"x.rb"}')
|
|
74
|
+
messages = 4.times.map { assistant_msg_with_functions([fn]) }
|
|
75
|
+
middleware = Brute::Middleware::DoomLoopDetection.new(inner_app, threshold: 3)
|
|
76
|
+
env = build_env(messages: messages)
|
|
77
|
+
middleware.call(env)
|
|
78
|
+
env[:metadata][:doom_loop_detected].should.not.be.nil
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
it "does not trigger below the threshold" do
|
|
82
|
+
inner_app = ->(_env) { MockResponse.new(content: "loop check") }
|
|
83
|
+
fn = FakeFunc.new(name: "fs_read", arguments: '{"path":"x.rb"}')
|
|
84
|
+
messages = 2.times.map { assistant_msg_with_functions([fn]) }
|
|
85
|
+
middleware = Brute::Middleware::DoomLoopDetection.new(inner_app, threshold: 3)
|
|
86
|
+
env = build_env(messages: messages)
|
|
87
|
+
middleware.call(env)
|
|
88
|
+
env[:metadata][:doom_loop_detected].should.be.nil
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
it "sets should_exit reason when doom loop detected" do
|
|
92
|
+
inner_app = ->(_env) { MockResponse.new(content: "loop check") }
|
|
93
|
+
fn = FakeFunc.new(name: "fs_read", arguments: '{"path":"x.rb"}')
|
|
94
|
+
messages = 4.times.map { assistant_msg_with_functions([fn]) }
|
|
95
|
+
middleware = Brute::Middleware::DoomLoopDetection.new(inner_app, threshold: 3)
|
|
96
|
+
env = build_env(messages: messages)
|
|
97
|
+
middleware.call(env)
|
|
98
|
+
env[:should_exit][:reason].should == "doom_loop_detected"
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
it "does not set should_exit when no loop detected" do
|
|
102
|
+
inner_app = ->(_env) { MockResponse.new(content: "loop check") }
|
|
103
|
+
middleware = Brute::Middleware::DoomLoopDetection.new(inner_app, threshold: 3)
|
|
104
|
+
env = build_env
|
|
105
|
+
middleware.call(env)
|
|
106
|
+
env[:should_exit].should.be.nil
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
it "does not overwrite should_exit if already set" do
|
|
110
|
+
inner_app = ->(_env) { MockResponse.new(content: "loop check") }
|
|
111
|
+
fn = FakeFunc.new(name: "fs_read", arguments: '{"path":"x.rb"}')
|
|
112
|
+
messages = 4.times.map { assistant_msg_with_functions([fn]) }
|
|
113
|
+
middleware = Brute::Middleware::DoomLoopDetection.new(inner_app, threshold: 3)
|
|
114
|
+
existing = { reason: "other", message: "earlier", source: "Other" }
|
|
115
|
+
env = build_env(messages: messages, should_exit: existing)
|
|
116
|
+
middleware.call(env)
|
|
117
|
+
env[:should_exit][:reason].should == "other"
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
it "appends a warning message when loop detected" do
|
|
121
|
+
inner_app = ->(_env) { MockResponse.new(content: "loop check") }
|
|
122
|
+
fn = FakeFunc.new(name: "fs_read", arguments: '{"path":"x.rb"}')
|
|
123
|
+
messages = 4.times.map { assistant_msg_with_functions([fn]) }
|
|
124
|
+
middleware = Brute::Middleware::DoomLoopDetection.new(inner_app, threshold: 3)
|
|
125
|
+
env = build_env(messages: messages)
|
|
126
|
+
original_count = env[:messages].size
|
|
127
|
+
middleware.call(env)
|
|
128
|
+
env[:messages].size.should == original_count + 1
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
it "generates warning message with repetition count" do
|
|
132
|
+
detector = Brute::Loop::DoomLoopDetector.new(threshold: 3)
|
|
133
|
+
msg = detector.warning_message(5)
|
|
134
|
+
msg.should =~ /5 times/
|
|
135
|
+
end
|
|
136
|
+
end
|
|
@@ -1,17 +1,35 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "bundler/setup"
|
|
4
|
+
require "brute"
|
|
5
|
+
|
|
3
6
|
module Brute
|
|
4
7
|
module Middleware
|
|
5
8
|
# The terminal "app" in the pipeline — performs the actual LLM call.
|
|
6
9
|
#
|
|
10
|
+
# Builds a fresh LLM::Context per call from env[:messages], makes the
|
|
11
|
+
# call, extracts new messages back into env[:messages], and stashes
|
|
12
|
+
# pending functions in env[:pending_functions].
|
|
13
|
+
#
|
|
7
14
|
# When streaming, on_content fires incrementally via AgentStream.
|
|
8
15
|
# When not streaming, fires on_content post-hoc with the full text.
|
|
9
16
|
#
|
|
10
17
|
class LLMCall
|
|
11
18
|
def call(env)
|
|
12
|
-
ctx = env
|
|
19
|
+
ctx = build_context(env)
|
|
20
|
+
|
|
21
|
+
# Load existing conversation history into the ephemeral context
|
|
22
|
+
ctx.messages.concat(env[:messages])
|
|
23
|
+
|
|
13
24
|
response = ctx.talk(env[:input])
|
|
14
25
|
|
|
26
|
+
# Extract new messages appended by talk() and store them
|
|
27
|
+
new_messages = ctx.messages.to_a.drop(env[:messages].size)
|
|
28
|
+
env[:messages].concat(new_messages)
|
|
29
|
+
|
|
30
|
+
# Stash pending functions for the agent loop
|
|
31
|
+
env[:pending_functions] = ctx.functions.to_a
|
|
32
|
+
|
|
15
33
|
# Only fire on_content post-hoc when NOT streaming
|
|
16
34
|
# (streaming delivers chunks incrementally via AgentStream)
|
|
17
35
|
unless env[:streaming]
|
|
@@ -26,6 +44,14 @@ module Brute
|
|
|
26
44
|
|
|
27
45
|
private
|
|
28
46
|
|
|
47
|
+
def build_context(env)
|
|
48
|
+
params = {}
|
|
49
|
+
params[:tools] = env[:tools] if env[:tools]&.any?
|
|
50
|
+
params[:stream] = env[:stream] if env[:stream]
|
|
51
|
+
params[:model] = env[:model] if env[:model]
|
|
52
|
+
LLM::Context.new(env[:provider], **params)
|
|
53
|
+
end
|
|
54
|
+
|
|
29
55
|
# Safely extract text content from an LLM response.
|
|
30
56
|
# Returns nil when the response contains only tool calls (no assistant text),
|
|
31
57
|
# which causes LLM::Contract::Completion#content to raise NoMethodError
|
|
@@ -39,3 +65,64 @@ module Brute
|
|
|
39
65
|
end
|
|
40
66
|
end
|
|
41
67
|
end
|
|
68
|
+
|
|
69
|
+
test do
|
|
70
|
+
require_relative "../../../spec/support/mock_provider"
|
|
71
|
+
require_relative "../../../spec/support/mock_response"
|
|
72
|
+
|
|
73
|
+
def build_env(**overrides)
|
|
74
|
+
{ provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
|
|
75
|
+
messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
|
|
76
|
+
tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
it "calls the provider and returns a response" do
|
|
80
|
+
provider = MockProvider.new
|
|
81
|
+
middleware = Brute::Middleware::LLMCall.new
|
|
82
|
+
env = build_env(provider: provider, input: "hello", streaming: false)
|
|
83
|
+
response = middleware.call(env)
|
|
84
|
+
response.should.not.be.nil
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
it "records a call on the provider" do
|
|
88
|
+
provider = MockProvider.new
|
|
89
|
+
middleware = Brute::Middleware::LLMCall.new
|
|
90
|
+
env = build_env(provider: provider, input: "hello", streaming: false)
|
|
91
|
+
middleware.call(env)
|
|
92
|
+
provider.calls.size.should == 1
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
it "appends new messages to env[:messages]" do
|
|
96
|
+
provider = MockProvider.new
|
|
97
|
+
middleware = Brute::Middleware::LLMCall.new
|
|
98
|
+
env = build_env(provider: provider, input: "hello", streaming: false)
|
|
99
|
+
middleware.call(env)
|
|
100
|
+
env[:messages].should.not.be.empty
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
it "populates env[:pending_functions] as an Array" do
|
|
104
|
+
provider = MockProvider.new
|
|
105
|
+
middleware = Brute::Middleware::LLMCall.new
|
|
106
|
+
env = build_env(provider: provider, input: "hello", streaming: false)
|
|
107
|
+
middleware.call(env)
|
|
108
|
+
env[:pending_functions].should.be.kind_of(Array)
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
it "does not fire on_content callback when streaming" do
|
|
112
|
+
provider = MockProvider.new
|
|
113
|
+
middleware = Brute::Middleware::LLMCall.new
|
|
114
|
+
called = false
|
|
115
|
+
env = build_env(provider: provider, input: "hi", streaming: true, callbacks: { on_content: ->(_) { called = true } })
|
|
116
|
+
middleware.call(env)
|
|
117
|
+
called.should.be.false
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
it "preserves existing messages across calls" do
|
|
121
|
+
provider = MockProvider.new
|
|
122
|
+
middleware = Brute::Middleware::LLMCall.new
|
|
123
|
+
existing = LLM::Message.new(:user, "previous")
|
|
124
|
+
env = build_env(provider: provider, input: "hello", streaming: false, messages: [existing])
|
|
125
|
+
middleware.call(env)
|
|
126
|
+
env[:messages].first.should == existing
|
|
127
|
+
end
|
|
128
|
+
end
|
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "bundler/setup"
|
|
4
|
+
require "brute"
|
|
5
|
+
|
|
3
6
|
module Brute
|
|
4
7
|
module Middleware
|
|
5
8
|
# Records every LLM exchange into a MessageStore in the OpenCode
|
|
@@ -15,7 +18,7 @@ module Brute
|
|
|
15
18
|
# corresponding tool parts to "completed" (or "error").
|
|
16
19
|
#
|
|
17
20
|
# The middleware also stores itself in env[:message_tracking] so the
|
|
18
|
-
#
|
|
21
|
+
# agent loop can access the current assistant message ID for callbacks.
|
|
19
22
|
#
|
|
20
23
|
class MessageTracking < Base
|
|
21
24
|
attr_reader :store
|
|
@@ -111,8 +114,7 @@ module Brute
|
|
|
111
114
|
end
|
|
112
115
|
|
|
113
116
|
def record_tool_calls(env)
|
|
114
|
-
|
|
115
|
-
functions = ctx.functions
|
|
117
|
+
functions = env[:pending_functions]
|
|
116
118
|
return if functions.nil? || functions.empty?
|
|
117
119
|
|
|
118
120
|
functions.each do |fn|
|
|
@@ -165,14 +167,11 @@ module Brute
|
|
|
165
167
|
# ── Helpers ────────────────────────────────────────────────────
|
|
166
168
|
|
|
167
169
|
# Resolve the actual model used for the request.
|
|
168
|
-
# Prefers
|
|
169
|
-
#
|
|
170
|
+
# Prefers env[:model] (set by AgentTurn) and falls back to the
|
|
171
|
+
# provider's default_model.
|
|
170
172
|
def resolve_model_name(env)
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
ctx_model = ctx.instance_variable_get(:@params)&.dig(:model)
|
|
174
|
-
return ctx_model.to_s if ctx_model
|
|
175
|
-
end
|
|
173
|
+
model = env[:model]
|
|
174
|
+
return model.to_s if model
|
|
176
175
|
|
|
177
176
|
# Fall back to provider default
|
|
178
177
|
env[:provider]&.respond_to?(:default_model) ? env[:provider].default_model.to_s : nil
|
|
@@ -207,3 +206,134 @@ module Brute
|
|
|
207
206
|
end
|
|
208
207
|
end
|
|
209
208
|
end
|
|
209
|
+
|
|
210
|
+
test do
|
|
211
|
+
require_relative "../../../spec/support/mock_provider"
|
|
212
|
+
require_relative "../../../spec/support/mock_response"
|
|
213
|
+
require "tmpdir"
|
|
214
|
+
require "fileutils"
|
|
215
|
+
|
|
216
|
+
def build_env(**overrides)
|
|
217
|
+
{ provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
|
|
218
|
+
messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
|
|
219
|
+
tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
def with_tracking
|
|
223
|
+
tmpdir = Dir.mktmpdir("brute_test_")
|
|
224
|
+
store = Brute::Store::MessageStore.new(session_id: "test-session", dir: tmpdir)
|
|
225
|
+
response = MockResponse.new(content: "Hello from the LLM")
|
|
226
|
+
inner_app = ->(_env) { response }
|
|
227
|
+
middleware = Brute::Middleware::MessageTracking.new(inner_app, store: store)
|
|
228
|
+
yield middleware, store, response
|
|
229
|
+
ensure
|
|
230
|
+
FileUtils.rm_rf(tmpdir)
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
it "records a user message on first call of a turn" do
|
|
234
|
+
with_tracking do |mw, store, _|
|
|
235
|
+
mw.call(build_env(input: "What is Ruby?", tool_results: nil))
|
|
236
|
+
user_msg = store.messages.find { |m| m[:info][:role] == "user" }
|
|
237
|
+
user_msg[:parts][0][:text].should == "What is Ruby?"
|
|
238
|
+
end
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
it "records only one user message per turn" do
|
|
242
|
+
with_tracking do |mw, store, _|
|
|
243
|
+
env = build_env(input: "Hello", tool_results: nil)
|
|
244
|
+
mw.call(env)
|
|
245
|
+
env[:tool_results] = [["read", "contents"]]
|
|
246
|
+
mw.call(env)
|
|
247
|
+
store.messages.select { |m| m[:info][:role] == "user" }.size.should == 1
|
|
248
|
+
end
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
it "records an assistant message after LLM call" do
|
|
252
|
+
with_tracking do |mw, store, _|
|
|
253
|
+
mw.call(build_env(input: "Hello", tool_results: nil))
|
|
254
|
+
asst = store.messages.find { |m| m[:info][:role] == "assistant" }
|
|
255
|
+
asst.should.not.be.nil
|
|
256
|
+
end
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
it "captures text content as a text part" do
|
|
260
|
+
with_tracking do |mw, store, _|
|
|
261
|
+
mw.call(build_env(input: "Hello", tool_results: nil))
|
|
262
|
+
asst = store.messages.find { |m| m[:info][:role] == "assistant" }
|
|
263
|
+
text_parts = asst[:parts].select { |p| p[:type] == "text" }
|
|
264
|
+
text_parts[0][:text].should == "Hello from the LLM"
|
|
265
|
+
end
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
it "captures token usage from response" do
|
|
269
|
+
with_tracking do |mw, store, _|
|
|
270
|
+
mw.call(build_env(input: "Hello", tool_results: nil))
|
|
271
|
+
asst = store.messages.find { |m| m[:info][:role] == "assistant" }
|
|
272
|
+
asst[:info][:tokens][:input].should == 100
|
|
273
|
+
end
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
it "records tool calls as tool parts in running state" do
|
|
277
|
+
with_tracking do |mw, store, _|
|
|
278
|
+
fn = Struct.new(:id, :name, :arguments, keyword_init: true).new(id: "call_001", name: "read", arguments: { file_path: "/test" })
|
|
279
|
+
mw.call(build_env(input: "Read the file", tool_results: nil, pending_functions: [fn]))
|
|
280
|
+
asst = store.messages.find { |m| m[:info][:role] == "assistant" }
|
|
281
|
+
tool_parts = asst[:parts].select { |p| p[:type] == "tool" }
|
|
282
|
+
tool_parts[0][:state][:status].should == "running"
|
|
283
|
+
end
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
it "updates tool parts when results arrive" do
|
|
287
|
+
with_tracking do |mw, store, _|
|
|
288
|
+
fn = Struct.new(:id, :name, :arguments, keyword_init: true).new(id: "call_001", name: "read", arguments: { file_path: "/test" })
|
|
289
|
+
env = build_env(input: "Read the file", tool_results: nil, pending_functions: [fn])
|
|
290
|
+
mw.call(env)
|
|
291
|
+
env[:pending_functions] = []
|
|
292
|
+
env[:tool_results] = [["read", "file contents here"]]
|
|
293
|
+
mw.call(env)
|
|
294
|
+
first_asst = store.messages.find { |m| m[:info][:role] == "assistant" }
|
|
295
|
+
tool_part = first_asst[:parts].find { |p| p[:type] == "tool" }
|
|
296
|
+
tool_part[:state][:status].should == "completed"
|
|
297
|
+
end
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
it "records provider default_model when no override" do
|
|
301
|
+
with_tracking do |mw, store, _|
|
|
302
|
+
mw.call(build_env(input: "Hello", tool_results: nil))
|
|
303
|
+
asst = store.messages.find { |m| m[:info][:role] == "assistant" }
|
|
304
|
+
asst[:info][:modelID].should == "mock-model"
|
|
305
|
+
end
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
it "records overridden model when env[:model] is set" do
|
|
309
|
+
with_tracking do |mw, store, _|
|
|
310
|
+
mw.call(build_env(input: "Hello", tool_results: nil, model: "custom-haiku"))
|
|
311
|
+
asst = store.messages.find { |m| m[:info][:role] == "assistant" }
|
|
312
|
+
asst[:info][:modelID].should == "custom-haiku"
|
|
313
|
+
end
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
it "stores itself in env[:message_tracking]" do
|
|
317
|
+
with_tracking do |mw, _, _|
|
|
318
|
+
env = build_env(input: "Hello", tool_results: nil)
|
|
319
|
+
mw.call(env)
|
|
320
|
+
env[:message_tracking].should == mw
|
|
321
|
+
end
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
it "returns the inner app response unchanged" do
|
|
325
|
+
with_tracking do |mw, _, response|
|
|
326
|
+
result = mw.call(build_env(input: "Hello", tool_results: nil))
|
|
327
|
+
result.should == response
|
|
328
|
+
end
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
it "adds a step-finish part to assistant messages" do
|
|
332
|
+
with_tracking do |mw, store, _|
|
|
333
|
+
mw.call(build_env(input: "Hello", tool_results: nil))
|
|
334
|
+
asst = store.messages.find { |m| m[:info][:role] == "assistant" }
|
|
335
|
+
step_finish = asst[:parts].find { |p| p[:type] == "step-finish" }
|
|
336
|
+
step_finish[:reason].should == "stop"
|
|
337
|
+
end
|
|
338
|
+
end
|
|
339
|
+
end
|
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "bundler/setup"
|
|
4
|
+
require "brute"
|
|
5
|
+
|
|
3
6
|
module Brute
|
|
4
7
|
module Middleware
|
|
5
8
|
module OTel
|
|
@@ -23,13 +26,13 @@ module Brute
|
|
|
23
26
|
return @app.call(env) unless defined?(::OpenTelemetry::SDK)
|
|
24
27
|
|
|
25
28
|
provider_name = provider_type(env[:provider])
|
|
26
|
-
model =
|
|
29
|
+
model = env[:model] || (env[:provider].default_model rescue nil)
|
|
27
30
|
span_name = model ? "llm.call #{model}" : "llm.call"
|
|
28
31
|
|
|
29
32
|
attributes = {
|
|
30
33
|
"brute.provider" => provider_name,
|
|
31
34
|
"brute.streaming" => !!env[:streaming],
|
|
32
|
-
"brute.context_messages" => env[:
|
|
35
|
+
"brute.context_messages" => env[:messages].size,
|
|
33
36
|
}
|
|
34
37
|
attributes["brute.model"] = model.to_s if model
|
|
35
38
|
attributes["brute.session_id"] = env[:metadata][:session_id].to_s if env.dig(:metadata, :session_id)
|
|
@@ -73,3 +76,30 @@ module Brute
|
|
|
73
76
|
end
|
|
74
77
|
end
|
|
75
78
|
end
|
|
79
|
+
|
|
80
|
+
test do
|
|
81
|
+
require_relative "../../../../spec/support/mock_provider"
|
|
82
|
+
require_relative "../../../../spec/support/mock_response"
|
|
83
|
+
|
|
84
|
+
def build_env(**overrides)
|
|
85
|
+
{ provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
|
|
86
|
+
messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
|
|
87
|
+
tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
it "passes through when OpenTelemetry::SDK is not defined" do
|
|
91
|
+
response = MockResponse.new(content: "hello from LLM")
|
|
92
|
+
middleware = Brute::Middleware::OTel::Span.new(->(_env) { response })
|
|
93
|
+
env = build_env
|
|
94
|
+
result = middleware.call(env)
|
|
95
|
+
result.should == response
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
it "env[:span] is nil when OTel is not defined" do
|
|
99
|
+
response = MockResponse.new(content: "hello from LLM")
|
|
100
|
+
middleware = Brute::Middleware::OTel::Span.new(->(_env) { response })
|
|
101
|
+
env = build_env
|
|
102
|
+
middleware.call(env)
|
|
103
|
+
env[:span].should.be.nil
|
|
104
|
+
end
|
|
105
|
+
end
|
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "bundler/setup"
|
|
4
|
+
require "brute"
|
|
5
|
+
|
|
3
6
|
module Brute
|
|
4
7
|
module Middleware
|
|
5
8
|
module OTel
|
|
@@ -28,3 +31,38 @@ module Brute
|
|
|
28
31
|
end
|
|
29
32
|
end
|
|
30
33
|
end
|
|
34
|
+
|
|
35
|
+
test do
|
|
36
|
+
require_relative "../../../../spec/support/mock_provider"
|
|
37
|
+
require_relative "../../../../spec/support/mock_response"
|
|
38
|
+
|
|
39
|
+
def build_env(**overrides)
|
|
40
|
+
{ provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
|
|
41
|
+
messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
|
|
42
|
+
tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def make_response
|
|
46
|
+
MockResponse.new(content: "hello",
|
|
47
|
+
usage: LLM::Usage.new(input_tokens: 100, output_tokens: 50, reasoning_tokens: 10, total_tokens: 160))
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
it "passes the response through unchanged" do
|
|
51
|
+
response = make_response
|
|
52
|
+
middleware = Brute::Middleware::OTel::TokenUsage.new(->(_env) { response })
|
|
53
|
+
result = middleware.call(build_env)
|
|
54
|
+
result.should == response
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
it "passes through without error when span is nil" do
|
|
58
|
+
response = make_response
|
|
59
|
+
middleware = Brute::Middleware::OTel::TokenUsage.new(->(_env) { response })
|
|
60
|
+
lambda { middleware.call(build_env) }.should.not.raise
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
it "handles a response without usage gracefully" do
|
|
64
|
+
no_usage = Object.new
|
|
65
|
+
middleware = Brute::Middleware::OTel::TokenUsage.new(->(_env) { no_usage })
|
|
66
|
+
lambda { middleware.call(build_env) }.should.not.raise
|
|
67
|
+
end
|
|
68
|
+
end
|
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "bundler/setup"
|
|
4
|
+
require "brute"
|
|
5
|
+
|
|
3
6
|
module Brute
|
|
4
7
|
module Middleware
|
|
5
8
|
module OTel
|
|
@@ -15,7 +18,7 @@ module Brute
|
|
|
15
18
|
|
|
16
19
|
span = env[:span]
|
|
17
20
|
if span
|
|
18
|
-
functions = env[:
|
|
21
|
+
functions = env[:pending_functions]
|
|
19
22
|
if functions && !functions.empty?
|
|
20
23
|
span.set_attribute("brute.tool_calls.count", functions.size)
|
|
21
24
|
|
|
@@ -37,3 +40,29 @@ module Brute
|
|
|
37
40
|
end
|
|
38
41
|
end
|
|
39
42
|
end
|
|
43
|
+
|
|
44
|
+
test do
|
|
45
|
+
require_relative "../../../../spec/support/mock_provider"
|
|
46
|
+
require_relative "../../../../spec/support/mock_response"
|
|
47
|
+
|
|
48
|
+
def build_env(**overrides)
|
|
49
|
+
{ provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
|
|
50
|
+
messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
|
|
51
|
+
tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
it "passes the response through unchanged" do
|
|
55
|
+
response = MockResponse.new(content: "here's my plan")
|
|
56
|
+
middleware = Brute::Middleware::OTel::ToolCalls.new(->(_env) { response })
|
|
57
|
+
result = middleware.call(build_env)
|
|
58
|
+
result.should == response
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
it "passes through without error when span is nil with pending functions" do
|
|
62
|
+
response = MockResponse.new(content: "here's my plan")
|
|
63
|
+
fn = Struct.new(:name, :id, :arguments, keyword_init: true).new(name: "fs_read", id: "tc_001", arguments: { "path" => "/tmp" })
|
|
64
|
+
middleware = Brute::Middleware::OTel::ToolCalls.new(->(_env) { response })
|
|
65
|
+
result = middleware.call(build_env(pending_functions: [fn]))
|
|
66
|
+
result.should == response
|
|
67
|
+
end
|
|
68
|
+
end
|
|
@@ -1,11 +1,14 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "bundler/setup"
|
|
4
|
+
require "brute"
|
|
5
|
+
|
|
3
6
|
module Brute
|
|
4
7
|
module Middleware
|
|
5
8
|
module OTel
|
|
6
9
|
# Records tool results being sent back to the LLM as span events.
|
|
7
10
|
#
|
|
8
|
-
# Runs PRE-call: when env[:tool_results] is present, the
|
|
11
|
+
# Runs PRE-call: when env[:tool_results] is present, the agent loop
|
|
9
12
|
# is sending tool execution results back to the LLM. Each result gets
|
|
10
13
|
# a span event with the tool name and success/error status.
|
|
11
14
|
#
|
|
@@ -35,3 +38,28 @@ module Brute
|
|
|
35
38
|
end
|
|
36
39
|
end
|
|
37
40
|
end
|
|
41
|
+
|
|
42
|
+
test do
|
|
43
|
+
require_relative "../../../../spec/support/mock_provider"
|
|
44
|
+
require_relative "../../../../spec/support/mock_response"
|
|
45
|
+
|
|
46
|
+
def build_env(**overrides)
|
|
47
|
+
{ provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
|
|
48
|
+
messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
|
|
49
|
+
tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
it "passes the response through unchanged" do
|
|
53
|
+
response = MockResponse.new(content: "processed")
|
|
54
|
+
middleware = Brute::Middleware::OTel::ToolResults.new(->(_env) { response })
|
|
55
|
+
result = middleware.call(build_env)
|
|
56
|
+
result.should == response
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
it "passes through without error when span is nil" do
|
|
60
|
+
response = MockResponse.new(content: "processed")
|
|
61
|
+
middleware = Brute::Middleware::OTel::ToolResults.new(->(_env) { response })
|
|
62
|
+
result = middleware.call(build_env(tool_results: [["fs_read", { content: "data" }]]))
|
|
63
|
+
result.should == response
|
|
64
|
+
end
|
|
65
|
+
end
|