brute 0.4.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/lib/brute/agent.rb +14 -0
  3. data/lib/brute/diff.rb +24 -0
  4. data/lib/brute/loop/agent_stream.rb +118 -0
  5. data/lib/brute/loop/agent_turn.rb +520 -0
  6. data/lib/brute/{compactor.rb → loop/compactor.rb} +2 -0
  7. data/lib/brute/{doom_loop.rb → loop/doom_loop.rb} +2 -0
  8. data/lib/brute/loop/step.rb +332 -0
  9. data/lib/brute/loop/tool_call_step.rb +90 -0
  10. data/lib/brute/middleware/compaction_check.rb +70 -23
  11. data/lib/brute/middleware/doom_loop_detection.rb +110 -7
  12. data/lib/brute/middleware/llm_call.rb +88 -1
  13. data/lib/brute/middleware/message_tracking.rb +140 -10
  14. data/lib/brute/middleware/otel/span.rb +32 -2
  15. data/lib/brute/middleware/otel/token_usage.rb +38 -0
  16. data/lib/brute/middleware/otel/tool_calls.rb +30 -1
  17. data/lib/brute/middleware/otel/tool_results.rb +29 -1
  18. data/lib/brute/middleware/otel.rb +5 -0
  19. data/lib/brute/middleware/reasoning_normalizer.rb +94 -0
  20. data/lib/brute/middleware/retry.rb +113 -1
  21. data/lib/brute/middleware/session_persistence.rb +46 -3
  22. data/lib/brute/middleware/token_tracking.rb +78 -0
  23. data/lib/brute/middleware/tool_error_tracking.rb +128 -1
  24. data/lib/brute/middleware/tool_use_guard.rb +64 -28
  25. data/lib/brute/middleware/tracing.rb +63 -2
  26. data/lib/brute/middleware.rb +18 -0
  27. data/lib/brute/orchestrator/turn.rb +105 -0
  28. data/lib/brute/patches/buffer_nil_guard.rb +5 -0
  29. data/lib/brute/pipeline.rb +86 -7
  30. data/lib/brute/prompts/build_switch.rb +29 -0
  31. data/lib/brute/prompts/environment.rb +43 -0
  32. data/lib/brute/prompts/identity.rb +29 -0
  33. data/lib/brute/prompts/instructions.rb +21 -0
  34. data/lib/brute/prompts/max_steps.rb +25 -0
  35. data/lib/brute/prompts/plan_reminder.rb +25 -0
  36. data/lib/brute/prompts/skills.rb +13 -0
  37. data/lib/brute/prompts.rb +28 -0
  38. data/lib/brute/providers/ollama.rb +135 -0
  39. data/lib/brute/providers/opencode_go.rb +5 -0
  40. data/lib/brute/providers/opencode_zen.rb +7 -2
  41. data/lib/brute/providers/shell.rb +2 -2
  42. data/lib/brute/providers/shell_response.rb +7 -2
  43. data/lib/brute/providers.rb +62 -0
  44. data/lib/brute/queue/base_queue.rb +222 -0
  45. data/lib/brute/{file_mutation_queue.rb → queue/file_mutation_queue.rb} +28 -26
  46. data/lib/brute/queue/parallel_queue.rb +66 -0
  47. data/lib/brute/queue/sequential_queue.rb +63 -0
  48. data/lib/brute/{message_store.rb → store/message_store.rb} +155 -62
  49. data/lib/brute/store/session.rb +106 -0
  50. data/lib/brute/{snapshot_store.rb → store/snapshot_store.rb} +2 -0
  51. data/lib/brute/{todo_store.rb → store/todo_store.rb} +2 -0
  52. data/lib/brute/system_prompt.rb +101 -0
  53. data/lib/brute/tools/delegate.rb +59 -0
  54. data/lib/brute/tools/fs_patch.rb +54 -2
  55. data/lib/brute/tools/fs_read.rb +5 -0
  56. data/lib/brute/tools/fs_remove.rb +7 -2
  57. data/lib/brute/tools/fs_search.rb +5 -0
  58. data/lib/brute/tools/fs_undo.rb +7 -2
  59. data/lib/brute/tools/fs_write.rb +40 -2
  60. data/lib/brute/tools/net_fetch.rb +5 -0
  61. data/lib/brute/tools/question.rb +5 -0
  62. data/lib/brute/tools/shell.rb +5 -0
  63. data/lib/brute/tools/todo_read.rb +6 -1
  64. data/lib/brute/tools/todo_write.rb +6 -1
  65. data/lib/brute/tools.rb +31 -0
  66. data/lib/brute/version.rb +1 -1
  67. data/lib/brute.rb +40 -204
  68. metadata +31 -20
  69. data/lib/brute/agent_stream.rb +0 -63
  70. data/lib/brute/hooks.rb +0 -84
  71. data/lib/brute/orchestrator.rb +0 -391
  72. data/lib/brute/session.rb +0 -161
@@ -1,29 +1,39 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "bundler/setup"
4
+ require "brute"
5
+
3
6
  module Brute
4
7
  module Middleware
5
8
  # Detects when the agent is stuck repeating tool call patterns and injects
6
- # a corrective warning into the context before the next LLM call.
9
+ # a corrective warning into the message history before the next LLM call.
7
10
  #
8
11
  # Runs PRE-call: inspects the conversation history for repeating tool call
9
- # patterns. If detected, talks a warning message into the context so the
10
- # LLM sees it as input alongside the normal tool results.
12
+ # patterns. If detected, appends a warning message so the LLM sees it as
13
+ # input alongside the normal tool results.
11
14
  #
12
15
  class DoomLoopDetection < Base
13
16
  def initialize(app, threshold: 3)
14
17
  super(app)
15
- @detector = Brute::DoomLoopDetector.new(threshold: threshold)
18
+ @detector = Brute::Loop::DoomLoopDetector.new(threshold: threshold)
16
19
  end
17
20
 
18
21
  def call(env)
19
- ctx = env[:context]
20
- messages = ctx.messages.to_a
22
+ messages = env[:messages]
21
23
 
22
24
  if (reps = @detector.detect(messages))
23
25
  warning = @detector.warning_message(reps)
24
26
  # Inject the warning as a user message so the LLM sees it
25
- ctx.talk(warning)
27
+ env[:messages] << LLM::Message.new(:user, warning)
26
28
  env[:metadata][:doom_loop_detected] = reps
29
+
30
+ # Signal the agent loop to exit after this LLM call completes.
31
+ # First-writer-wins: don't overwrite if another middleware already set it.
32
+ env[:should_exit] ||= {
33
+ reason: "doom_loop_detected",
34
+ message: "Agent is stuck repeating the same tool calls (#{reps} repetitions).",
35
+ source: "DoomLoopDetection",
36
+ }
27
37
  end
28
38
 
29
39
  @app.call(env)
@@ -31,3 +41,96 @@ module Brute
31
41
  end
32
42
  end
33
43
  end
44
+
45
+ test do
46
+ require_relative "../../../spec/support/mock_provider"
47
+ require_relative "../../../spec/support/mock_response"
48
+
49
+ def build_env(**overrides)
50
+ { provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
51
+ messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
52
+ tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
53
+ end
54
+
55
+ FakeFunc = Struct.new(:name, :arguments, keyword_init: true)
56
+
57
+ def assistant_msg_with_functions(function_list)
58
+ msg = LLM::Message.new(:assistant, "tool msg", {})
59
+ msg.define_singleton_method(:functions) { function_list }
60
+ msg
61
+ end
62
+
63
+ it "passes through when no doom loop is detected" do
64
+ inner_app = ->(_env) { MockResponse.new(content: "loop check") }
65
+ middleware = Brute::Middleware::DoomLoopDetection.new(inner_app, threshold: 3)
66
+ env = build_env
67
+ middleware.call(env)
68
+ env[:metadata][:doom_loop_detected].should.be.nil
69
+ end
70
+
71
+ it "detects consecutive identical tool calls" do
72
+ inner_app = ->(_env) { MockResponse.new(content: "loop check") }
73
+ fn = FakeFunc.new(name: "fs_read", arguments: '{"path":"x.rb"}')
74
+ messages = 4.times.map { assistant_msg_with_functions([fn]) }
75
+ middleware = Brute::Middleware::DoomLoopDetection.new(inner_app, threshold: 3)
76
+ env = build_env(messages: messages)
77
+ middleware.call(env)
78
+ env[:metadata][:doom_loop_detected].should.not.be.nil
79
+ end
80
+
81
+ it "does not trigger below the threshold" do
82
+ inner_app = ->(_env) { MockResponse.new(content: "loop check") }
83
+ fn = FakeFunc.new(name: "fs_read", arguments: '{"path":"x.rb"}')
84
+ messages = 2.times.map { assistant_msg_with_functions([fn]) }
85
+ middleware = Brute::Middleware::DoomLoopDetection.new(inner_app, threshold: 3)
86
+ env = build_env(messages: messages)
87
+ middleware.call(env)
88
+ env[:metadata][:doom_loop_detected].should.be.nil
89
+ end
90
+
91
+ it "sets should_exit reason when doom loop detected" do
92
+ inner_app = ->(_env) { MockResponse.new(content: "loop check") }
93
+ fn = FakeFunc.new(name: "fs_read", arguments: '{"path":"x.rb"}')
94
+ messages = 4.times.map { assistant_msg_with_functions([fn]) }
95
+ middleware = Brute::Middleware::DoomLoopDetection.new(inner_app, threshold: 3)
96
+ env = build_env(messages: messages)
97
+ middleware.call(env)
98
+ env[:should_exit][:reason].should == "doom_loop_detected"
99
+ end
100
+
101
+ it "does not set should_exit when no loop detected" do
102
+ inner_app = ->(_env) { MockResponse.new(content: "loop check") }
103
+ middleware = Brute::Middleware::DoomLoopDetection.new(inner_app, threshold: 3)
104
+ env = build_env
105
+ middleware.call(env)
106
+ env[:should_exit].should.be.nil
107
+ end
108
+
109
+ it "does not overwrite should_exit if already set" do
110
+ inner_app = ->(_env) { MockResponse.new(content: "loop check") }
111
+ fn = FakeFunc.new(name: "fs_read", arguments: '{"path":"x.rb"}')
112
+ messages = 4.times.map { assistant_msg_with_functions([fn]) }
113
+ middleware = Brute::Middleware::DoomLoopDetection.new(inner_app, threshold: 3)
114
+ existing = { reason: "other", message: "earlier", source: "Other" }
115
+ env = build_env(messages: messages, should_exit: existing)
116
+ middleware.call(env)
117
+ env[:should_exit][:reason].should == "other"
118
+ end
119
+
120
+ it "appends a warning message when loop detected" do
121
+ inner_app = ->(_env) { MockResponse.new(content: "loop check") }
122
+ fn = FakeFunc.new(name: "fs_read", arguments: '{"path":"x.rb"}')
123
+ messages = 4.times.map { assistant_msg_with_functions([fn]) }
124
+ middleware = Brute::Middleware::DoomLoopDetection.new(inner_app, threshold: 3)
125
+ env = build_env(messages: messages)
126
+ original_count = env[:messages].size
127
+ middleware.call(env)
128
+ env[:messages].size.should == original_count + 1
129
+ end
130
+
131
+ it "generates warning message with repetition count" do
132
+ detector = Brute::Loop::DoomLoopDetector.new(threshold: 3)
133
+ msg = detector.warning_message(5)
134
+ msg.should =~ /5 times/
135
+ end
136
+ end
@@ -1,17 +1,35 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "bundler/setup"
4
+ require "brute"
5
+
3
6
  module Brute
4
7
  module Middleware
5
8
  # The terminal "app" in the pipeline — performs the actual LLM call.
6
9
  #
10
+ # Builds a fresh LLM::Context per call from env[:messages], makes the
11
+ # call, extracts new messages back into env[:messages], and stashes
12
+ # pending functions in env[:pending_functions].
13
+ #
7
14
  # When streaming, on_content fires incrementally via AgentStream.
8
15
  # When not streaming, fires on_content post-hoc with the full text.
9
16
  #
10
17
  class LLMCall
11
18
  def call(env)
12
- ctx = env[:context]
19
+ ctx = build_context(env)
20
+
21
+ # Load existing conversation history into the ephemeral context
22
+ ctx.messages.concat(env[:messages])
23
+
13
24
  response = ctx.talk(env[:input])
14
25
 
26
+ # Extract new messages appended by talk() and store them
27
+ new_messages = ctx.messages.to_a.drop(env[:messages].size)
28
+ env[:messages].concat(new_messages)
29
+
30
+ # Stash pending functions for the agent loop
31
+ env[:pending_functions] = ctx.functions.to_a
32
+
15
33
  # Only fire on_content post-hoc when NOT streaming
16
34
  # (streaming delivers chunks incrementally via AgentStream)
17
35
  unless env[:streaming]
@@ -26,6 +44,14 @@ module Brute
26
44
 
27
45
  private
28
46
 
47
+ def build_context(env)
48
+ params = {}
49
+ params[:tools] = env[:tools] if env[:tools]&.any?
50
+ params[:stream] = env[:stream] if env[:stream]
51
+ params[:model] = env[:model] if env[:model]
52
+ LLM::Context.new(env[:provider], **params)
53
+ end
54
+
29
55
  # Safely extract text content from an LLM response.
30
56
  # Returns nil when the response contains only tool calls (no assistant text),
31
57
  # which causes LLM::Contract::Completion#content to raise NoMethodError
@@ -39,3 +65,64 @@ module Brute
39
65
  end
40
66
  end
41
67
  end
68
+
69
+ test do
70
+ require_relative "../../../spec/support/mock_provider"
71
+ require_relative "../../../spec/support/mock_response"
72
+
73
+ def build_env(**overrides)
74
+ { provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
75
+ messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
76
+ tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
77
+ end
78
+
79
+ it "calls the provider and returns a response" do
80
+ provider = MockProvider.new
81
+ middleware = Brute::Middleware::LLMCall.new
82
+ env = build_env(provider: provider, input: "hello", streaming: false)
83
+ response = middleware.call(env)
84
+ response.should.not.be.nil
85
+ end
86
+
87
+ it "records a call on the provider" do
88
+ provider = MockProvider.new
89
+ middleware = Brute::Middleware::LLMCall.new
90
+ env = build_env(provider: provider, input: "hello", streaming: false)
91
+ middleware.call(env)
92
+ provider.calls.size.should == 1
93
+ end
94
+
95
+ it "appends new messages to env[:messages]" do
96
+ provider = MockProvider.new
97
+ middleware = Brute::Middleware::LLMCall.new
98
+ env = build_env(provider: provider, input: "hello", streaming: false)
99
+ middleware.call(env)
100
+ env[:messages].should.not.be.empty
101
+ end
102
+
103
+ it "populates env[:pending_functions] as an Array" do
104
+ provider = MockProvider.new
105
+ middleware = Brute::Middleware::LLMCall.new
106
+ env = build_env(provider: provider, input: "hello", streaming: false)
107
+ middleware.call(env)
108
+ env[:pending_functions].should.be.kind_of(Array)
109
+ end
110
+
111
+ it "does not fire on_content callback when streaming" do
112
+ provider = MockProvider.new
113
+ middleware = Brute::Middleware::LLMCall.new
114
+ called = false
115
+ env = build_env(provider: provider, input: "hi", streaming: true, callbacks: { on_content: ->(_) { called = true } })
116
+ middleware.call(env)
117
+ called.should.be.false
118
+ end
119
+
120
+ it "preserves existing messages across calls" do
121
+ provider = MockProvider.new
122
+ middleware = Brute::Middleware::LLMCall.new
123
+ existing = LLM::Message.new(:user, "previous")
124
+ env = build_env(provider: provider, input: "hello", streaming: false, messages: [existing])
125
+ middleware.call(env)
126
+ env[:messages].first.should == existing
127
+ end
128
+ end
@@ -1,5 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "bundler/setup"
4
+ require "brute"
5
+
3
6
  module Brute
4
7
  module Middleware
5
8
  # Records every LLM exchange into a MessageStore in the OpenCode
@@ -15,7 +18,7 @@ module Brute
15
18
  # corresponding tool parts to "completed" (or "error").
16
19
  #
17
20
  # The middleware also stores itself in env[:message_tracking] so the
18
- # orchestrator can access the current assistant message ID for callbacks.
21
+ # agent loop can access the current assistant message ID for callbacks.
19
22
  #
20
23
  class MessageTracking < Base
21
24
  attr_reader :store
@@ -111,8 +114,7 @@ module Brute
111
114
  end
112
115
 
113
116
  def record_tool_calls(env)
114
- ctx = env[:context]
115
- functions = ctx.functions
117
+ functions = env[:pending_functions]
116
118
  return if functions.nil? || functions.empty?
117
119
 
118
120
  functions.each do |fn|
@@ -165,14 +167,11 @@ module Brute
165
167
  # ── Helpers ────────────────────────────────────────────────────
166
168
 
167
169
  # Resolve the actual model used for the request.
168
- # Prefers the model set on the LLM::Context (which respects user overrides)
169
- # and falls back to the provider's default_model.
170
+ # Prefers env[:model] (set by AgentTurn) and falls back to the
171
+ # provider's default_model.
170
172
  def resolve_model_name(env)
171
- ctx = env[:context]
172
- if ctx && ctx.instance_variable_defined?(:@params)
173
- ctx_model = ctx.instance_variable_get(:@params)&.dig(:model)
174
- return ctx_model.to_s if ctx_model
175
- end
173
+ model = env[:model]
174
+ return model.to_s if model
176
175
 
177
176
  # Fall back to provider default
178
177
  env[:provider]&.respond_to?(:default_model) ? env[:provider].default_model.to_s : nil
@@ -207,3 +206,134 @@ module Brute
207
206
  end
208
207
  end
209
208
  end
209
+
210
+ test do
211
+ require_relative "../../../spec/support/mock_provider"
212
+ require_relative "../../../spec/support/mock_response"
213
+ require "tmpdir"
214
+ require "fileutils"
215
+
216
+ def build_env(**overrides)
217
+ { provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
218
+ messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
219
+ tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
220
+ end
221
+
222
+ def with_tracking
223
+ tmpdir = Dir.mktmpdir("brute_test_")
224
+ store = Brute::Store::MessageStore.new(session_id: "test-session", dir: tmpdir)
225
+ response = MockResponse.new(content: "Hello from the LLM")
226
+ inner_app = ->(_env) { response }
227
+ middleware = Brute::Middleware::MessageTracking.new(inner_app, store: store)
228
+ yield middleware, store, response
229
+ ensure
230
+ FileUtils.rm_rf(tmpdir)
231
+ end
232
+
233
+ it "records a user message on first call of a turn" do
234
+ with_tracking do |mw, store, _|
235
+ mw.call(build_env(input: "What is Ruby?", tool_results: nil))
236
+ user_msg = store.messages.find { |m| m[:info][:role] == "user" }
237
+ user_msg[:parts][0][:text].should == "What is Ruby?"
238
+ end
239
+ end
240
+
241
+ it "records only one user message per turn" do
242
+ with_tracking do |mw, store, _|
243
+ env = build_env(input: "Hello", tool_results: nil)
244
+ mw.call(env)
245
+ env[:tool_results] = [["read", "contents"]]
246
+ mw.call(env)
247
+ store.messages.select { |m| m[:info][:role] == "user" }.size.should == 1
248
+ end
249
+ end
250
+
251
+ it "records an assistant message after LLM call" do
252
+ with_tracking do |mw, store, _|
253
+ mw.call(build_env(input: "Hello", tool_results: nil))
254
+ asst = store.messages.find { |m| m[:info][:role] == "assistant" }
255
+ asst.should.not.be.nil
256
+ end
257
+ end
258
+
259
+ it "captures text content as a text part" do
260
+ with_tracking do |mw, store, _|
261
+ mw.call(build_env(input: "Hello", tool_results: nil))
262
+ asst = store.messages.find { |m| m[:info][:role] == "assistant" }
263
+ text_parts = asst[:parts].select { |p| p[:type] == "text" }
264
+ text_parts[0][:text].should == "Hello from the LLM"
265
+ end
266
+ end
267
+
268
+ it "captures token usage from response" do
269
+ with_tracking do |mw, store, _|
270
+ mw.call(build_env(input: "Hello", tool_results: nil))
271
+ asst = store.messages.find { |m| m[:info][:role] == "assistant" }
272
+ asst[:info][:tokens][:input].should == 100
273
+ end
274
+ end
275
+
276
+ it "records tool calls as tool parts in running state" do
277
+ with_tracking do |mw, store, _|
278
+ fn = Struct.new(:id, :name, :arguments, keyword_init: true).new(id: "call_001", name: "read", arguments: { file_path: "/test" })
279
+ mw.call(build_env(input: "Read the file", tool_results: nil, pending_functions: [fn]))
280
+ asst = store.messages.find { |m| m[:info][:role] == "assistant" }
281
+ tool_parts = asst[:parts].select { |p| p[:type] == "tool" }
282
+ tool_parts[0][:state][:status].should == "running"
283
+ end
284
+ end
285
+
286
+ it "updates tool parts when results arrive" do
287
+ with_tracking do |mw, store, _|
288
+ fn = Struct.new(:id, :name, :arguments, keyword_init: true).new(id: "call_001", name: "read", arguments: { file_path: "/test" })
289
+ env = build_env(input: "Read the file", tool_results: nil, pending_functions: [fn])
290
+ mw.call(env)
291
+ env[:pending_functions] = []
292
+ env[:tool_results] = [["read", "file contents here"]]
293
+ mw.call(env)
294
+ first_asst = store.messages.find { |m| m[:info][:role] == "assistant" }
295
+ tool_part = first_asst[:parts].find { |p| p[:type] == "tool" }
296
+ tool_part[:state][:status].should == "completed"
297
+ end
298
+ end
299
+
300
+ it "records provider default_model when no override" do
301
+ with_tracking do |mw, store, _|
302
+ mw.call(build_env(input: "Hello", tool_results: nil))
303
+ asst = store.messages.find { |m| m[:info][:role] == "assistant" }
304
+ asst[:info][:modelID].should == "mock-model"
305
+ end
306
+ end
307
+
308
+ it "records overridden model when env[:model] is set" do
309
+ with_tracking do |mw, store, _|
310
+ mw.call(build_env(input: "Hello", tool_results: nil, model: "custom-haiku"))
311
+ asst = store.messages.find { |m| m[:info][:role] == "assistant" }
312
+ asst[:info][:modelID].should == "custom-haiku"
313
+ end
314
+ end
315
+
316
+ it "stores itself in env[:message_tracking]" do
317
+ with_tracking do |mw, _, _|
318
+ env = build_env(input: "Hello", tool_results: nil)
319
+ mw.call(env)
320
+ env[:message_tracking].should == mw
321
+ end
322
+ end
323
+
324
+ it "returns the inner app response unchanged" do
325
+ with_tracking do |mw, _, response|
326
+ result = mw.call(build_env(input: "Hello", tool_results: nil))
327
+ result.should == response
328
+ end
329
+ end
330
+
331
+ it "adds a step-finish part to assistant messages" do
332
+ with_tracking do |mw, store, _|
333
+ mw.call(build_env(input: "Hello", tool_results: nil))
334
+ asst = store.messages.find { |m| m[:info][:role] == "assistant" }
335
+ step_finish = asst[:parts].find { |p| p[:type] == "step-finish" }
336
+ step_finish[:reason].should == "stop"
337
+ end
338
+ end
339
+ end
@@ -1,5 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "bundler/setup"
4
+ require "brute"
5
+
3
6
  module Brute
4
7
  module Middleware
5
8
  module OTel
@@ -23,13 +26,13 @@ module Brute
23
26
  return @app.call(env) unless defined?(::OpenTelemetry::SDK)
24
27
 
25
28
  provider_name = provider_type(env[:provider])
26
- model = begin; env[:context].model; rescue; nil; end
29
+ model = env[:model] || (env[:provider].default_model rescue nil)
27
30
  span_name = model ? "llm.call #{model}" : "llm.call"
28
31
 
29
32
  attributes = {
30
33
  "brute.provider" => provider_name,
31
34
  "brute.streaming" => !!env[:streaming],
32
- "brute.context_messages" => env[:context].messages.to_a.size,
35
+ "brute.context_messages" => env[:messages].size,
33
36
  }
34
37
  attributes["brute.model"] = model.to_s if model
35
38
  attributes["brute.session_id"] = env[:metadata][:session_id].to_s if env.dig(:metadata, :session_id)
@@ -73,3 +76,30 @@ module Brute
73
76
  end
74
77
  end
75
78
  end
79
+
80
+ test do
81
+ require_relative "../../../../spec/support/mock_provider"
82
+ require_relative "../../../../spec/support/mock_response"
83
+
84
+ def build_env(**overrides)
85
+ { provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
86
+ messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
87
+ tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
88
+ end
89
+
90
+ it "passes through when OpenTelemetry::SDK is not defined" do
91
+ response = MockResponse.new(content: "hello from LLM")
92
+ middleware = Brute::Middleware::OTel::Span.new(->(_env) { response })
93
+ env = build_env
94
+ result = middleware.call(env)
95
+ result.should == response
96
+ end
97
+
98
+ it "env[:span] is nil when OTel is not defined" do
99
+ response = MockResponse.new(content: "hello from LLM")
100
+ middleware = Brute::Middleware::OTel::Span.new(->(_env) { response })
101
+ env = build_env
102
+ middleware.call(env)
103
+ env[:span].should.be.nil
104
+ end
105
+ end
@@ -1,5 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "bundler/setup"
4
+ require "brute"
5
+
3
6
  module Brute
4
7
  module Middleware
5
8
  module OTel
@@ -28,3 +31,38 @@ module Brute
28
31
  end
29
32
  end
30
33
  end
34
+
35
+ test do
36
+ require_relative "../../../../spec/support/mock_provider"
37
+ require_relative "../../../../spec/support/mock_response"
38
+
39
+ def build_env(**overrides)
40
+ { provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
41
+ messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
42
+ tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
43
+ end
44
+
45
+ def make_response
46
+ MockResponse.new(content: "hello",
47
+ usage: LLM::Usage.new(input_tokens: 100, output_tokens: 50, reasoning_tokens: 10, total_tokens: 160))
48
+ end
49
+
50
+ it "passes the response through unchanged" do
51
+ response = make_response
52
+ middleware = Brute::Middleware::OTel::TokenUsage.new(->(_env) { response })
53
+ result = middleware.call(build_env)
54
+ result.should == response
55
+ end
56
+
57
+ it "passes through without error when span is nil" do
58
+ response = make_response
59
+ middleware = Brute::Middleware::OTel::TokenUsage.new(->(_env) { response })
60
+ lambda { middleware.call(build_env) }.should.not.raise
61
+ end
62
+
63
+ it "handles a response without usage gracefully" do
64
+ no_usage = Object.new
65
+ middleware = Brute::Middleware::OTel::TokenUsage.new(->(_env) { no_usage })
66
+ lambda { middleware.call(build_env) }.should.not.raise
67
+ end
68
+ end
@@ -1,5 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "bundler/setup"
4
+ require "brute"
5
+
3
6
  module Brute
4
7
  module Middleware
5
8
  module OTel
@@ -15,7 +18,7 @@ module Brute
15
18
 
16
19
  span = env[:span]
17
20
  if span
18
- functions = env[:context].functions
21
+ functions = env[:pending_functions]
19
22
  if functions && !functions.empty?
20
23
  span.set_attribute("brute.tool_calls.count", functions.size)
21
24
 
@@ -37,3 +40,29 @@ module Brute
37
40
  end
38
41
  end
39
42
  end
43
+
44
+ test do
45
+ require_relative "../../../../spec/support/mock_provider"
46
+ require_relative "../../../../spec/support/mock_response"
47
+
48
+ def build_env(**overrides)
49
+ { provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
50
+ messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
51
+ tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
52
+ end
53
+
54
+ it "passes the response through unchanged" do
55
+ response = MockResponse.new(content: "here's my plan")
56
+ middleware = Brute::Middleware::OTel::ToolCalls.new(->(_env) { response })
57
+ result = middleware.call(build_env)
58
+ result.should == response
59
+ end
60
+
61
+ it "passes through without error when span is nil with pending functions" do
62
+ response = MockResponse.new(content: "here's my plan")
63
+ fn = Struct.new(:name, :id, :arguments, keyword_init: true).new(name: "fs_read", id: "tc_001", arguments: { "path" => "/tmp" })
64
+ middleware = Brute::Middleware::OTel::ToolCalls.new(->(_env) { response })
65
+ result = middleware.call(build_env(pending_functions: [fn]))
66
+ result.should == response
67
+ end
68
+ end
@@ -1,11 +1,14 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "bundler/setup"
4
+ require "brute"
5
+
3
6
  module Brute
4
7
  module Middleware
5
8
  module OTel
6
9
  # Records tool results being sent back to the LLM as span events.
7
10
  #
8
- # Runs PRE-call: when env[:tool_results] is present, the orchestrator
11
+ # Runs PRE-call: when env[:tool_results] is present, the agent loop
9
12
  # is sending tool execution results back to the LLM. Each result gets
10
13
  # a span event with the tool name and success/error status.
11
14
  #
@@ -35,3 +38,28 @@ module Brute
35
38
  end
36
39
  end
37
40
  end
41
+
42
+ test do
43
+ require_relative "../../../../spec/support/mock_provider"
44
+ require_relative "../../../../spec/support/mock_response"
45
+
46
+ def build_env(**overrides)
47
+ { provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
48
+ messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
49
+ tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
50
+ end
51
+
52
+ it "passes the response through unchanged" do
53
+ response = MockResponse.new(content: "processed")
54
+ middleware = Brute::Middleware::OTel::ToolResults.new(->(_env) { response })
55
+ result = middleware.call(build_env)
56
+ result.should == response
57
+ end
58
+
59
+ it "passes through without error when span is nil" do
60
+ response = MockResponse.new(content: "processed")
61
+ middleware = Brute::Middleware::OTel::ToolResults.new(->(_env) { response })
62
+ result = middleware.call(build_env(tool_results: [["fs_read", { content: "data" }]]))
63
+ result.should == response
64
+ end
65
+ end
@@ -1,5 +1,10 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ if __FILE__ == $0
4
+ require "bundler/setup"
5
+ require "brute"
6
+ end
7
+
3
8
  module Brute
4
9
  module Middleware
5
10
  # OpenTelemetry instrumentation for the LLM pipeline.