brute 0.4.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +4 -4
  2. data/lib/brute/agent.rb +14 -0
  3. data/lib/brute/diff.rb +18 -28
  4. data/lib/brute/loop/agent_stream.rb +118 -0
  5. data/lib/brute/loop/agent_turn.rb +520 -0
  6. data/lib/brute/{compactor.rb → loop/compactor.rb} +2 -0
  7. data/lib/brute/{doom_loop.rb → loop/doom_loop.rb} +2 -0
  8. data/lib/brute/loop/step.rb +332 -0
  9. data/lib/brute/loop/tool_call_step.rb +90 -0
  10. data/lib/brute/middleware/compaction_check.rb +60 -146
  11. data/lib/brute/middleware/doom_loop_detection.rb +95 -92
  12. data/lib/brute/middleware/llm_call.rb +78 -80
  13. data/lib/brute/middleware/message_tracking.rb +115 -162
  14. data/lib/brute/middleware/otel/span.rb +25 -106
  15. data/lib/brute/middleware/otel/token_usage.rb +29 -84
  16. data/lib/brute/middleware/otel/tool_calls.rb +23 -107
  17. data/lib/brute/middleware/otel/tool_results.rb +22 -86
  18. data/lib/brute/middleware/reasoning_normalizer.rb +78 -103
  19. data/lib/brute/middleware/retry.rb +95 -76
  20. data/lib/brute/middleware/session_persistence.rb +38 -37
  21. data/lib/brute/middleware/token_tracking.rb +64 -63
  22. data/lib/brute/middleware/tool_error_tracking.rb +108 -82
  23. data/lib/brute/middleware/tool_use_guard.rb +57 -90
  24. data/lib/brute/middleware/tracing.rb +53 -63
  25. data/lib/brute/middleware.rb +18 -0
  26. data/lib/brute/orchestrator/turn.rb +105 -0
  27. data/lib/brute/pipeline.rb +77 -133
  28. data/lib/brute/prompts/build_switch.rb +21 -25
  29. data/lib/brute/prompts/environment.rb +31 -35
  30. data/lib/brute/prompts/identity.rb +22 -29
  31. data/lib/brute/prompts/instructions.rb +15 -18
  32. data/lib/brute/prompts/max_steps.rb +18 -25
  33. data/lib/brute/prompts/plan_reminder.rb +18 -26
  34. data/lib/brute/prompts/skills.rb +8 -30
  35. data/lib/brute/prompts.rb +28 -0
  36. data/lib/brute/providers/ollama.rb +135 -0
  37. data/lib/brute/providers/shell.rb +2 -2
  38. data/lib/brute/providers/shell_response.rb +2 -2
  39. data/lib/brute/providers.rb +62 -0
  40. data/lib/brute/queue/base_queue.rb +222 -0
  41. data/lib/brute/{file_mutation_queue.rb → queue/file_mutation_queue.rb} +28 -26
  42. data/lib/brute/queue/parallel_queue.rb +66 -0
  43. data/lib/brute/queue/sequential_queue.rb +63 -0
  44. data/lib/brute/store/message_store.rb +362 -0
  45. data/lib/brute/store/session.rb +106 -0
  46. data/lib/brute/{snapshot_store.rb → store/snapshot_store.rb} +2 -0
  47. data/lib/brute/{todo_store.rb → store/todo_store.rb} +2 -0
  48. data/lib/brute/system_prompt.rb +81 -194
  49. data/lib/brute/tools/delegate.rb +46 -116
  50. data/lib/brute/tools/fs_patch.rb +36 -37
  51. data/lib/brute/tools/fs_remove.rb +2 -2
  52. data/lib/brute/tools/fs_undo.rb +2 -2
  53. data/lib/brute/tools/fs_write.rb +29 -41
  54. data/lib/brute/tools/todo_read.rb +1 -1
  55. data/lib/brute/tools/todo_write.rb +1 -1
  56. data/lib/brute/tools.rb +31 -0
  57. data/lib/brute/version.rb +1 -1
  58. data/lib/brute.rb +40 -204
  59. metadata +31 -20
  60. data/lib/brute/agent_stream.rb +0 -181
  61. data/lib/brute/hooks.rb +0 -84
  62. data/lib/brute/message_store.rb +0 -463
  63. data/lib/brute/orchestrator.rb +0 -550
  64. data/lib/brute/session.rb +0 -161
@@ -1,9 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- if __FILE__ == $0
4
- require "bundler/setup"
5
- require "brute"
6
- end
3
+ require "bundler/setup"
4
+ require "brute"
7
5
 
8
6
  module Brute
9
7
  module Middleware
@@ -15,7 +13,7 @@ module Brute
15
13
  # and counts failures and totals.
16
14
  #
17
15
  # When any tool exceeds max_failures, it sets env[:metadata][:tool_error_limit_reached]
18
- # so the orchestrator can decide to stop.
16
+ # so the agent loop can decide to stop.
19
17
  #
20
18
  # Also stores env[:metadata][:tool_calls] with the cumulative number of
21
19
  # tool invocations in the current session.
@@ -44,6 +42,15 @@ module Brute
44
42
  env[:metadata][:tool_errors] = @errors.dup
45
43
  env[:metadata][:tool_error_limit_reached] = @errors.any? { |_, c| c >= @max_failures }
46
44
 
45
+ if env[:metadata][:tool_error_limit_reached]
46
+ failed_tool, fail_count = @errors.max_by { |_, c| c }
47
+ env[:should_exit] ||= {
48
+ reason: "tool_error_limit_reached",
49
+ message: "Tool '#{failed_tool}' has failed #{fail_count} times (limit: #{@max_failures}). Stopping.",
50
+ source: "ToolErrorTracking",
51
+ }
52
+ end
53
+
47
54
  @app.call(env)
48
55
  end
49
56
 
@@ -56,98 +63,117 @@ module Brute
56
63
  end
57
64
  end
58
65
 
59
- if __FILE__ == $0
60
- require_relative "../../../spec/spec_helper"
61
-
62
- RSpec.describe Brute::Middleware::ToolErrorTracking do
63
- let(:response) { MockResponse.new(content: "tracked") }
64
- let(:inner_app) { ->(_env) { response } }
65
- let(:middleware) { described_class.new(inner_app, max_failures: 3) }
66
+ test do
67
+ require_relative "../../../spec/support/mock_provider"
68
+ require_relative "../../../spec/support/mock_response"
66
69
 
67
- it "passes the response through" do
68
- env = build_env
69
- result = middleware.call(env)
70
- expect(result).to eq(response)
71
- end
72
-
73
- it "reports zero tool calls when tool_results is nil" do
74
- env = build_env(tool_results: nil)
75
- middleware.call(env)
76
-
77
- expect(env[:metadata][:tool_calls]).to eq(0)
78
- expect(env[:metadata][:tool_errors]).to eq({})
79
- expect(env[:metadata][:tool_error_limit_reached]).to be false
80
- end
70
+ def build_env(**overrides)
71
+ { provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
72
+ messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
73
+ tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
74
+ end
81
75
 
82
- it "counts total tool calls from tool_results" do
83
- results = [
84
- ["fs_read", { content: "data" }],
85
- ["shell", { output: "ok" }],
86
- ["fs_write", { success: true }],
87
- ]
88
- env = build_env(tool_results: results)
89
- middleware.call(env)
76
+ def make_middleware(app = nil)
77
+ app ||= ->(_env) { MockResponse.new(content: "tracked") }
78
+ Brute::Middleware::ToolErrorTracking.new(app, max_failures: 3)
79
+ end
90
80
 
91
- expect(env[:metadata][:tool_calls]).to eq(3)
92
- end
81
+ it "passes the response through" do
82
+ response = MockResponse.new(content: "tracked")
83
+ app = ->(_env) { response }
84
+ result = make_middleware(app).call(build_env)
85
+ result.should == response
86
+ end
93
87
 
94
- it "counts per-tool errors from results with error key" do
95
- results = [
96
- ["fs_read", { error: "not found" }],
97
- ["fs_read", { error: "permission denied" }],
98
- ["shell", { output: "ok" }],
99
- ]
100
- env = build_env(tool_results: results)
101
- middleware.call(env)
88
+ it "reports zero tool calls when tool_results is nil" do
89
+ env = build_env(tool_results: nil)
90
+ make_middleware.call(env)
91
+ env[:metadata][:tool_calls].should == 0
92
+ end
102
93
 
103
- expect(env[:metadata][:tool_errors]).to eq({ "fs_read" => 2 })
104
- end
94
+ it "reports empty tool errors when tool_results is nil" do
95
+ env = build_env(tool_results: nil)
96
+ make_middleware.call(env)
97
+ env[:metadata][:tool_errors].should == {}
98
+ end
105
99
 
106
- it "sets tool_error_limit_reached when a tool hits max_failures" do
107
- results = [
108
- ["fs_read", { error: "fail 1" }],
109
- ["fs_read", { error: "fail 2" }],
110
- ["fs_read", { error: "fail 3" }],
111
- ]
112
- env = build_env(tool_results: results)
113
- middleware.call(env)
100
+ it "does not flag limit reached when tool_results is nil" do
101
+ env = build_env(tool_results: nil)
102
+ make_middleware.call(env)
103
+ env[:metadata][:tool_error_limit_reached].should.be.false
104
+ end
114
105
 
115
- expect(env[:metadata][:tool_error_limit_reached]).to be true
116
- end
106
+ it "counts total tool calls from tool_results" do
107
+ results = [["fs_read", { content: "data" }], ["shell", { output: "ok" }], ["fs_write", { success: true }]]
108
+ env = build_env(tool_results: results)
109
+ make_middleware.call(env)
110
+ env[:metadata][:tool_calls].should == 3
111
+ end
117
112
 
118
- it "does not flag below the threshold" do
119
- results = [
120
- ["fs_read", { error: "fail 1" }],
121
- ["fs_read", { error: "fail 2" }],
122
- ]
123
- env = build_env(tool_results: results)
124
- middleware.call(env)
113
+ it "counts per-tool errors from results with error key" do
114
+ results = [["fs_read", { error: "not found" }], ["fs_read", { error: "denied" }], ["shell", { output: "ok" }]]
115
+ env = build_env(tool_results: results)
116
+ make_middleware.call(env)
117
+ env[:metadata][:tool_errors].should == { "fs_read" => 2 }
118
+ end
125
119
 
126
- expect(env[:metadata][:tool_error_limit_reached]).to be false
127
- end
120
+ it "sets tool_error_limit_reached when a tool hits max_failures" do
121
+ results = [["fs_read", { error: "1" }], ["fs_read", { error: "2" }], ["fs_read", { error: "3" }]]
122
+ env = build_env(tool_results: results)
123
+ make_middleware.call(env)
124
+ env[:metadata][:tool_error_limit_reached].should.be.true
125
+ end
128
126
 
129
- it "accumulates counts across multiple calls" do
130
- env1 = build_env(tool_results: [["fs_read", { error: "fail" }]])
131
- middleware.call(env1)
127
+ it "does not flag below the threshold" do
128
+ results = [["fs_read", { error: "1" }], ["fs_read", { error: "2" }]]
129
+ env = build_env(tool_results: results)
130
+ make_middleware.call(env)
131
+ env[:metadata][:tool_error_limit_reached].should.be.false
132
+ end
132
133
 
133
- env2 = build_env(tool_results: [["fs_read", { error: "fail again" }], ["shell", { output: "ok" }]])
134
- middleware.call(env2)
134
+ it "accumulates counts across multiple calls" do
135
+ mw = make_middleware
136
+ mw.call(build_env(tool_results: [["fs_read", { error: "fail" }]]))
137
+ env2 = build_env(tool_results: [["fs_read", { error: "again" }], ["shell", { output: "ok" }]])
138
+ mw.call(env2)
139
+ env2[:metadata][:tool_calls].should == 3
140
+ end
135
141
 
136
- expect(env2[:metadata][:tool_calls]).to eq(3) # 1 + 2
137
- expect(env2[:metadata][:tool_errors]).to eq({ "fs_read" => 2 })
138
- end
142
+ it "clears counters on reset!" do
143
+ mw = make_middleware
144
+ mw.call(build_env(tool_results: [["fs_read", { error: "fail" }]]))
145
+ mw.reset!
146
+ env2 = build_env(tool_results: nil)
147
+ mw.call(env2)
148
+ env2[:metadata][:tool_calls].should == 0
149
+ end
139
150
 
140
- it "clears counters on reset!" do
141
- env = build_env(tool_results: [["fs_read", { error: "fail" }]])
142
- middleware.call(env)
151
+ it "sets should_exit reason when error limit reached" do
152
+ results = [["fs_read", { error: "1" }], ["fs_read", { error: "2" }], ["fs_read", { error: "3" }]]
153
+ env = build_env(tool_results: results)
154
+ make_middleware.call(env)
155
+ env[:should_exit][:reason].should == "tool_error_limit_reached"
156
+ end
143
157
 
144
- middleware.reset!
158
+ it "sets should_exit source to ToolErrorTracking" do
159
+ results = [["fs_read", { error: "1" }], ["fs_read", { error: "2" }], ["fs_read", { error: "3" }]]
160
+ env = build_env(tool_results: results)
161
+ make_middleware.call(env)
162
+ env[:should_exit][:source].should == "ToolErrorTracking"
163
+ end
145
164
 
146
- env2 = build_env(tool_results: nil)
147
- middleware.call(env2)
165
+ it "does not set should_exit below the threshold" do
166
+ results = [["fs_read", { error: "1" }], ["fs_read", { error: "2" }]]
167
+ env = build_env(tool_results: results)
168
+ make_middleware.call(env)
169
+ env[:should_exit].should.be.nil
170
+ end
148
171
 
149
- expect(env2[:metadata][:tool_calls]).to eq(0)
150
- expect(env2[:metadata][:tool_errors]).to eq({})
151
- end
172
+ it "does not overwrite should_exit if already set" do
173
+ results = [["fs_read", { error: "1" }], ["fs_read", { error: "2" }], ["fs_read", { error: "3" }]]
174
+ existing = { reason: "doom_loop_detected", message: "loop", source: "DoomLoopDetection" }
175
+ env = build_env(tool_results: results, should_exit: existing)
176
+ make_middleware.call(env)
177
+ env[:should_exit][:reason].should == "doom_loop_detected"
152
178
  end
153
179
  end
@@ -1,24 +1,27 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "bundler/setup"
4
+ require "brute"
5
+
3
6
  module Brute
4
7
  module Middleware
5
8
  # Guards against tool-only LLM responses where the assistant message
6
9
  # is dropped from the context buffer.
7
10
  #
8
11
  # When the LLM responds with only tool_use blocks (no text), llm.rb's
9
- # response adapter produces empty choices. Context#talk appends nil,
10
- # BufferNilGuard strips it, and the assistant message carrying tool_use
11
- # blocks is lost. This causes "unexpected tool_use_id" on the next call
12
- # because tool_result references a tool_use that's missing from the buffer.
12
+ # response adapter produces empty choices. The assistant message carrying
13
+ # tool_use blocks may be lost. This causes "unexpected tool_use_id" on
14
+ # the next call because tool_result references a tool_use that's missing
15
+ # from the message history.
13
16
  #
14
17
  # This middleware runs post-call and ensures every pending tool_use ID
15
- # is covered by an assistant message in the buffer. It handles three
18
+ # is covered by an assistant message in env[:messages]. It handles three
16
19
  # cases:
17
20
  #
18
- # 1. ctx.functions is non-empty and the assistant message exists → no-op
19
- # 2. ctx.functions is non-empty but the assistant message is missing
21
+ # 1. pending_functions is non-empty and the assistant message exists → no-op
22
+ # 2. pending_functions is non-empty but the assistant message is missing
20
23
  # (or has different IDs) → inject synthetic message
21
- # 3. ctx.functions is empty (nil-choice bug) but the stream recorded
24
+ # 3. pending_functions is empty (nil-choice bug) but the stream recorded
22
25
  # tool calls → inject synthetic message using stream metadata
23
26
  #
24
27
  class ToolUseGuard
@@ -29,32 +32,30 @@ module Brute
29
32
  def call(env)
30
33
  response = @app.call(env)
31
34
 
32
- ctx = env[:context]
33
-
34
- # Collect pending tool data from ctx.functions (primary) or the
35
- # stream's recorded metadata (fallback for nil-choice bug).
36
- tool_data = collect_tool_data(ctx, env)
35
+ # Collect pending tool data from env[:pending_functions] (primary)
36
+ # or the stream's recorded metadata (fallback for nil-choice bug).
37
+ tool_data = collect_tool_data(env)
37
38
  return response if tool_data.empty?
38
39
 
39
40
  # Find all tool_use IDs already covered by assistant messages.
40
- covered_ids = covered_tool_ids(ctx)
41
+ covered_ids = covered_tool_ids(env[:messages])
41
42
 
42
43
  # Inject a synthetic assistant message for any uncovered tool calls.
43
44
  uncovered = tool_data.reject { |td| covered_ids.include?(td[:id]) }
44
- inject_synthetic!(ctx, uncovered) unless uncovered.empty?
45
+ inject_synthetic!(env[:messages], uncovered) unless uncovered.empty?
45
46
 
46
47
  response
47
48
  end
48
49
 
49
50
  private
50
51
 
51
- def collect_tool_data(ctx, env)
52
- functions = ctx.functions
52
+ def collect_tool_data(env)
53
+ functions = env[:pending_functions]
53
54
  if functions && !functions.empty?
54
55
  functions.map { |fn| { id: fn.id, name: fn.name, arguments: fn.arguments } }
55
56
  elsif env[:streaming]
56
- stream = resolve_stream(ctx)
57
- if stream
57
+ stream = env[:stream]
58
+ if stream&.respond_to?(:pending_tool_calls)
58
59
  data = stream.pending_tool_calls.dup
59
60
  stream.clear_pending_tool_calls!
60
61
  data
@@ -66,19 +67,14 @@ module Brute
66
67
  end
67
68
  end
68
69
 
69
- def resolve_stream(ctx)
70
- stream = ctx.instance_variable_get(:@params)&.dig(:stream)
71
- stream if stream.respond_to?(:pending_tool_calls)
72
- end
73
-
74
- def covered_tool_ids(ctx)
75
- ctx.messages.to_a
70
+ def covered_tool_ids(messages)
71
+ messages
76
72
  .select { |m| m.role.to_s == "assistant" && m.tool_call? }
77
73
  .flat_map { |m| (m.extra.original_tool_calls || []).map { |tc| tc["id"] } }
78
74
  .to_set
79
75
  end
80
76
 
81
- def inject_synthetic!(ctx, uncovered)
77
+ def inject_synthetic!(messages, uncovered)
82
78
  tool_calls = uncovered.map do |td|
83
79
  LLM::Object.from(id: td[:id], name: td[:name], arguments: td[:arguments])
84
80
  end
@@ -90,77 +86,48 @@ module Brute
90
86
  tool_calls: tool_calls,
91
87
  original_tool_calls: original_tool_calls,
92
88
  })
93
- ctx.messages.concat([synthetic])
89
+ messages << synthetic
94
90
  end
95
91
  end
96
92
  end
97
93
  end
98
94
 
99
- if __FILE__ == $0
100
- require_relative "../../../spec/spec_helper"
101
-
102
- RSpec.describe Brute::Middleware::ToolUseGuard do
103
- let(:provider) { MockProvider.new }
104
-
105
- # Helper: build a response that produces pending tool calls (functions) in the context.
106
- def make_tool_response(tool_calls:)
107
- MockResponse.new(content: "", tool_calls: tool_calls)
108
- end
109
-
110
- it "passes the response through when there are no pending functions" do
111
- response = MockResponse.new(content: "no tools")
112
- allow(provider).to receive(:complete).and_return(response)
113
-
114
- ctx = LLM::Context.new(provider, tools: [])
115
- prompt = ctx.prompt { |p| p.system("sys"); p.user("hi") }
116
-
117
- inner_app = ->(_env) { ctx.talk(prompt); response }
118
- middleware = described_class.new(inner_app)
119
- env = build_env(context: ctx, provider: provider)
120
-
121
- result = middleware.call(env)
122
- expect(result).to eq(response)
123
- end
124
-
125
- it "does not inject a synthetic message when the assistant message already has tool_call?" do
126
- tool_calls = [{ id: "toolu_1", name: "fs_read", arguments: { "path" => "test.rb" } }]
127
- response = make_tool_response(tool_calls: tool_calls)
128
- allow(provider).to receive(:complete).and_return(response)
95
+ test do
96
+ require_relative "../../../spec/support/mock_provider"
97
+ require_relative "../../../spec/support/mock_response"
129
98
 
130
- ctx = LLM::Context.new(provider, tools: [])
131
- prompt = ctx.prompt { |p| p.system("sys"); p.user("read it") }
132
-
133
- inner_app = ->(_env) { ctx.talk(prompt); response }
134
- middleware = described_class.new(inner_app)
135
- env = build_env(context: ctx, provider: provider)
136
-
137
- middleware.call(env)
138
-
139
- messages = ctx.messages.to_a
140
- assistant_msgs = messages.select { |m| m.role.to_s == "assistant" }
141
- # Should only have the original assistant message, no synthetic
142
- expect(assistant_msgs.size).to eq(1)
143
- end
144
-
145
- it "injects a synthetic assistant message when tool calls exist but assistant is missing" do
146
- tool_calls = [{ id: "toolu_1", name: "fs_read", arguments: { "path" => "test.rb" } }]
147
- response = MockResponse.new(content: "")
148
- # Simulate the bug: choices[-1] is nil, so no assistant message stored
149
- allow(response).to receive(:choices).and_return([nil])
150
- allow(provider).to receive(:complete).and_return(response)
151
-
152
- ctx = LLM::Context.new(provider, tools: [])
153
- prompt = ctx.prompt { |p| p.system("sys"); p.user("read it") }
99
+ def build_env(**overrides)
100
+ { provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
101
+ messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
102
+ tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
103
+ end
154
104
 
155
- inner_app = ->(_env) do
156
- ctx.talk(prompt)
157
- response
158
- end
105
+ it "passes the response through when there are no pending functions" do
106
+ response = MockResponse.new(content: "no tools")
107
+ inner_app = ->(_env) { response }
108
+ middleware = Brute::Middleware::ToolUseGuard.new(inner_app)
109
+ result = middleware.call(build_env(pending_functions: []))
110
+ result.should == response
111
+ end
159
112
 
160
- middleware = described_class.new(inner_app)
161
- env = build_env(context: ctx, provider: provider)
113
+ it "injects a synthetic assistant message when tool calls exist but assistant is missing" do
114
+ fn = Struct.new(:id, :name, :arguments, keyword_init: true)
115
+ .new(id: "toolu_1", name: "fs_read", arguments: { "path" => "test.rb" })
116
+ response = MockResponse.new(content: "")
117
+ inner_app = ->(_env) { response }
118
+ middleware = Brute::Middleware::ToolUseGuard.new(inner_app)
119
+ env = build_env(messages: [], pending_functions: [fn])
120
+ lambda { middleware.call(env) }.should.not.raise
121
+ end
162
122
 
163
- expect { middleware.call(env) }.not_to raise_error
164
- end
123
+ it "creates one assistant message for uncovered tool calls" do
124
+ fn = Struct.new(:id, :name, :arguments, keyword_init: true)
125
+ .new(id: "toolu_1", name: "fs_read", arguments: { "path" => "test.rb" })
126
+ response = MockResponse.new(content: "")
127
+ inner_app = ->(_env) { response }
128
+ middleware = Brute::Middleware::ToolUseGuard.new(inner_app)
129
+ env = build_env(messages: [], pending_functions: [fn])
130
+ middleware.call(env)
131
+ env[:messages].select { |m| m.role.to_s == "assistant" }.size.should == 1
165
132
  end
166
133
  end
@@ -1,9 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- if __FILE__ == $0
4
- require "bundler/setup"
5
- require "brute"
6
- end
3
+ require "bundler/setup"
4
+ require "brute"
7
5
 
8
6
  module Brute
9
7
  module Middleware
@@ -14,7 +12,7 @@ module Brute
14
12
  # call. It also tracks total wall-clock time across all calls in a turn
15
13
  # (including tool execution gaps between LLM calls).
16
14
  #
17
- # A new turn is detected when env[:tool_results] is nil (the orchestrator
15
+ # A new turn is detected when env[:tool_results] is nil (the agent loop
18
16
  # sets this on the first call of each run()).
19
17
  #
20
18
  # Stores in env[:metadata][:timing]:
@@ -41,7 +39,7 @@ module Brute
41
39
  @total_llm_elapsed = 0.0
42
40
  end
43
41
 
44
- messages = env[:context].messages.to_a
42
+ messages = env[:messages]
45
43
  @logger.debug("[brute] LLM call ##{@call_count} (#{messages.size} messages in context)")
46
44
 
47
45
  start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
@@ -67,68 +65,60 @@ module Brute
67
65
  end
68
66
  end
69
67
 
70
- if __FILE__ == $0
71
- require_relative "../../../spec/spec_helper"
72
-
73
- RSpec.describe Brute::Middleware::Tracing do
74
- let(:response) { MockResponse.new(content: "traced response") }
75
- let(:inner_app) { ->(_env) { response } }
76
- let(:log_output) { StringIO.new }
77
- let(:logger) { Logger.new(log_output) }
78
- let(:middleware) { described_class.new(inner_app, logger: logger) }
79
-
80
- it "passes the response through unchanged" do
81
- env = build_env(tool_results: nil)
82
- result = middleware.call(env)
83
- expect(result).to eq(response)
84
- end
85
-
86
- it "populates env[:metadata][:timing] with all required keys" do
87
- env = build_env(tool_results: nil)
88
- middleware.call(env)
89
-
90
- timing = env[:metadata][:timing]
91
- expect(timing).to include(
92
- :total_elapsed,
93
- :total_llm_elapsed,
94
- :llm_call_count,
95
- :last_call_elapsed
96
- )
97
- expect(timing[:llm_call_count]).to eq(1)
98
- expect(timing[:last_call_elapsed]).to be >= 0
99
- expect(timing[:total_llm_elapsed]).to be >= 0
100
- end
101
-
102
- it "resets turn timing when tool_results is nil (new turn)" do
103
- env = build_env(tool_results: nil)
104
- middleware.call(env)
105
- first_elapsed = env[:metadata][:timing][:total_llm_elapsed]
68
+ test do
69
+ require_relative "../../../spec/support/mock_provider"
70
+ require_relative "../../../spec/support/mock_response"
106
71
 
107
- # Simulate continuation within the same turn (tool_results present)
108
- env[:tool_results] = [["read", { content: "file data" }]]
109
- middleware.call(env)
72
+ def build_env(**overrides)
73
+ { provider: MockProvider.new, model: nil, input: "test prompt", tools: [],
74
+ messages: [], stream: nil, params: {}, metadata: {}, callbacks: {},
75
+ tool_results: nil, streaming: false, should_exit: nil, pending_functions: [] }.merge(overrides)
76
+ end
110
77
 
111
- expect(env[:metadata][:timing][:llm_call_count]).to eq(2)
112
- expect(env[:metadata][:timing][:total_llm_elapsed]).to be >= first_elapsed
113
- end
78
+ it "passes the response through unchanged" do
79
+ response = MockResponse.new(content: "traced response")
80
+ inner_app = ->(_env) { response }
81
+ middleware = Brute::Middleware::Tracing.new(inner_app, logger: Logger.new(StringIO.new))
82
+ result = middleware.call(build_env(tool_results: nil))
83
+ result.should == response
84
+ end
114
85
 
115
- it "accumulates call count across multiple calls" do
116
- env = build_env(tool_results: nil)
117
- middleware.call(env)
118
- env[:tool_results] = [["read", {}]]
119
- middleware.call(env)
120
- middleware.call(env)
86
+ it "populates timing with llm_call_count" do
87
+ response = MockResponse.new(content: "traced response")
88
+ inner_app = ->(_env) { response }
89
+ middleware = Brute::Middleware::Tracing.new(inner_app, logger: Logger.new(StringIO.new))
90
+ env = build_env(tool_results: nil)
91
+ middleware.call(env)
92
+ env[:metadata][:timing][:llm_call_count].should == 1
93
+ end
121
94
 
122
- expect(env[:metadata][:timing][:llm_call_count]).to eq(3)
123
- end
95
+ it "populates timing with non-negative last_call_elapsed" do
96
+ response = MockResponse.new(content: "traced response")
97
+ inner_app = ->(_env) { response }
98
+ middleware = Brute::Middleware::Tracing.new(inner_app, logger: Logger.new(StringIO.new))
99
+ env = build_env(tool_results: nil)
100
+ middleware.call(env)
101
+ (env[:metadata][:timing][:last_call_elapsed] >= 0).should.be.true
102
+ end
124
103
 
125
- it "logs debug and info messages" do
126
- env = build_env(tool_results: nil)
127
- middleware.call(env)
104
+ it "accumulates call count across multiple calls" do
105
+ response = MockResponse.new(content: "traced response")
106
+ inner_app = ->(_env) { response }
107
+ middleware = Brute::Middleware::Tracing.new(inner_app, logger: Logger.new(StringIO.new))
108
+ env = build_env(tool_results: nil)
109
+ middleware.call(env)
110
+ env[:tool_results] = [["read", {}]]
111
+ middleware.call(env)
112
+ middleware.call(env)
113
+ env[:metadata][:timing][:llm_call_count].should == 3
114
+ end
128
115
 
129
- log_text = log_output.string
130
- expect(log_text).to include("LLM call #1")
131
- expect(log_text).to include("LLM response #1")
132
- end
116
+ it "logs LLM call and response messages" do
117
+ response = MockResponse.new(content: "traced response")
118
+ inner_app = ->(_env) { response }
119
+ log_output = StringIO.new
120
+ middleware = Brute::Middleware::Tracing.new(inner_app, logger: Logger.new(log_output))
121
+ middleware.call(build_env(tool_results: nil))
122
+ log_output.string.should =~ /LLM call #1/
133
123
  end
134
124
  end
@@ -0,0 +1,18 @@
1
+ require_relative 'middleware/base'
2
+ require_relative 'middleware/llm_call'
3
+ require_relative 'middleware/retry'
4
+ require_relative 'middleware/doom_loop_detection'
5
+ require_relative 'middleware/token_tracking'
6
+ require_relative 'middleware/compaction_check'
7
+ require_relative 'middleware/session_persistence'
8
+ require_relative 'middleware/message_tracking'
9
+ require_relative 'middleware/tracing'
10
+ require_relative 'middleware/tool_error_tracking'
11
+ require_relative 'middleware/reasoning_normalizer'
12
+ require_relative "middleware/tool_use_guard"
13
+ require_relative "middleware/otel"
14
+
15
+ module Brute
16
+ module Middleware
17
+ end
18
+ end