brute 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4f0f32487b029541fdb462f5f4958a95e4727150911b51f6d8ab457b875187d8
4
- data.tar.gz: f162d75e227b4270e4a56dba42fe3cdddd23f9492adeff5cd1f345cbbb811961
3
+ metadata.gz: 07be8ba156b49a76de7dda83633348bc4f9bc87552b29846a5bbf11be83c13c2
4
+ data.tar.gz: a3643e4bae2399cf0b92cd52f9b8083f2cc0bfa78da64228e33cb3fd6504a54f
5
5
  SHA512:
6
- metadata.gz: 9aa172f042960dc5c9ec3250cf27781614077be7db114edc65d4ce72e178dfcab9ed9a9918d6583ed190d36cc54e865d8118bea9ac9814064a12a3f2b7ac8627
7
- data.tar.gz: c0cc5addf257b161cc06385417a24833bf9f99a73038040abc67c3caef75ab0a0b9e3d357364b0fa0d194ca3b259555b80f09bf805fc12d17be1e4386d8b061b
6
+ metadata.gz: 1c6f739d3d488d5c2dd728bd7144ccb27793a592dcda4628edb228c0c3c77d6f961d38877a249b6c6d9a009ee88bc8d2d7e0ab4651d6319073e18d3edb29c35c
7
+ data.tar.gz: 1da1558c7b9e04027003b5bdff82d0dd894a1643e1e6f15b8d57b4123d44367e8ad0abbd7780a7927b71710f94343ad32186dd4e68ea2e74f80c067f3dd70d6e
@@ -8,11 +8,22 @@ module Brute
8
8
  # response is still streaming. on_tool_result fires as each thread finishes.
9
9
  #
10
10
  class AgentStream < LLM::Stream
11
+ # Tool call metadata recorded during streaming, used by ToolUseGuard
12
+ # when ctx.functions is empty (nil-choice bug in llm.rb).
13
+ # Cleared by the guard after consumption to prevent stale data from
14
+ # causing duplicate synthetic assistant messages on subsequent calls.
15
+ attr_reader :pending_tool_calls
16
+
17
+ def clear_pending_tool_calls!
18
+ @pending_tool_calls.clear
19
+ end
20
+
11
21
  def initialize(on_content: nil, on_reasoning: nil, on_tool_call: nil, on_tool_result: nil)
12
22
  @on_content = on_content
13
23
  @on_reasoning = on_reasoning
14
24
  @on_tool_call = on_tool_call
15
25
  @on_tool_result = on_tool_result
26
+ @pending_tool_calls = []
16
27
  end
17
28
 
18
29
  def on_content(text)
@@ -24,6 +35,7 @@ module Brute
24
35
  end
25
36
 
26
37
  def on_tool_call(tool, error)
38
+ @pending_tool_calls << { id: tool.id, name: tool.name, arguments: tool.arguments }
27
39
  @on_tool_call&.call(tool.name, tool.arguments)
28
40
 
29
41
  if error
@@ -0,0 +1,269 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "fileutils"
5
+ require "securerandom"
6
+
7
+ module Brute
8
+ # Stores session messages as individual JSON files in the OpenCode
9
+ # {info, parts} format. Each session gets a directory; each message
10
+ # is a numbered JSON file inside it.
11
+ #
12
+ # Storage layout:
13
+ #
14
+ # ~/.brute/sessions/{session-id}/
15
+ # session.meta.json
16
+ # msg_0001.json
17
+ # msg_0002.json
18
+ # ...
19
+ #
20
+ # Message format matches OpenCode's MessageV2.WithParts:
21
+ #
22
+ # { info: { id:, sessionID:, role:, time:, ... },
23
+ # parts: [{ id:, type:, ... }, ...] }
24
+ #
25
+ class MessageStore
26
+ attr_reader :session_id, :dir
27
+
28
+ def initialize(session_id:, dir: nil)
29
+ @session_id = session_id
30
+ @dir = dir || File.join(Dir.home, ".brute", "sessions", session_id)
31
+ @messages = {} # id => { info:, parts: }
32
+ @seq = 0
33
+ @part_seq = 0
34
+ @mutex = Mutex.new
35
+ load_existing
36
+ end
37
+
38
+ # ── Append messages ──────────────────────────────────────────────
39
+
40
+ # Record a user message.
41
+ def append_user(text:, message_id: nil)
42
+ id = message_id || next_message_id
43
+ msg = {
44
+ info: {
45
+ id: id,
46
+ sessionID: @session_id,
47
+ role: "user",
48
+ time: { created: now_ms },
49
+ },
50
+ parts: [
51
+ { id: next_part_id, sessionID: @session_id, messageID: id,
52
+ type: "text", text: text },
53
+ ],
54
+ }
55
+ save_message(id, msg)
56
+ id
57
+ end
58
+
59
+ # Record the start of an assistant message. Returns the message ID.
60
+ # Call complete_assistant later to fill in tokens/timing.
61
+ def append_assistant(message_id: nil, parent_id: nil, model_id: nil, provider_id: nil)
62
+ id = message_id || next_message_id
63
+ msg = {
64
+ info: {
65
+ id: id,
66
+ sessionID: @session_id,
67
+ role: "assistant",
68
+ parentID: parent_id,
69
+ time: { created: now_ms },
70
+ modelID: model_id,
71
+ providerID: provider_id,
72
+ tokens: { input: 0, output: 0, reasoning: 0, cache: { read: 0, write: 0 } },
73
+ cost: 0.0,
74
+ },
75
+ parts: [],
76
+ }
77
+ save_message(id, msg)
78
+ id
79
+ end
80
+
81
+ # ── Parts ────────────────────────────────────────────────────────
82
+
83
+ # Add a text part to an existing message.
84
+ def add_text_part(message_id:, text:)
85
+ @mutex.synchronize do
86
+ msg = @messages[message_id]
87
+ return unless msg
88
+
89
+ part = { id: next_part_id, sessionID: @session_id, messageID: message_id,
90
+ type: "text", text: text }
91
+ msg[:parts] << part
92
+ persist(message_id)
93
+ part[:id]
94
+ end
95
+ end
96
+
97
+ # Add a tool part in "running" state. Returns the part ID.
98
+ def add_tool_part(message_id:, tool:, call_id:, input:)
99
+ @mutex.synchronize do
100
+ msg = @messages[message_id]
101
+ return unless msg
102
+
103
+ part = {
104
+ id: next_part_id, sessionID: @session_id, messageID: message_id,
105
+ type: "tool", callID: call_id, tool: tool,
106
+ state: {
107
+ status: "running",
108
+ input: input,
109
+ time: { start: now_ms },
110
+ },
111
+ }
112
+ msg[:parts] << part
113
+ persist(message_id)
114
+ part[:id]
115
+ end
116
+ end
117
+
118
+ # Mark a tool part as completed with output.
119
+ def complete_tool_part(message_id:, call_id:, output:)
120
+ @mutex.synchronize do
121
+ msg = @messages[message_id]
122
+ return unless msg
123
+
124
+ part = msg[:parts].find { |p| p[:type] == "tool" && p[:callID] == call_id }
125
+ return unless part
126
+
127
+ part[:state][:status] = "completed"
128
+ part[:state][:output] = output
129
+ part[:state][:time][:end] = now_ms
130
+ persist(message_id)
131
+ end
132
+ end
133
+
134
+ # Mark a tool part as errored.
135
+ def error_tool_part(message_id:, call_id:, error:)
136
+ @mutex.synchronize do
137
+ msg = @messages[message_id]
138
+ return unless msg
139
+
140
+ part = msg[:parts].find { |p| p[:type] == "tool" && p[:callID] == call_id }
141
+ return unless part
142
+
143
+ part[:state][:status] = "error"
144
+ part[:state][:error] = error.to_s
145
+ part[:state][:time][:end] = now_ms
146
+ persist(message_id)
147
+ end
148
+ end
149
+
150
+ # Add a step-finish part to an assistant message.
151
+ def add_step_finish(message_id:, tokens: nil)
152
+ @mutex.synchronize do
153
+ msg = @messages[message_id]
154
+ return unless msg
155
+
156
+ part = {
157
+ id: next_part_id, sessionID: @session_id, messageID: message_id,
158
+ type: "step-finish",
159
+ reason: "stop",
160
+ tokens: tokens || { input: 0, output: 0, reasoning: 0, cache: { read: 0, write: 0 } },
161
+ }
162
+ msg[:parts] << part
163
+ persist(message_id)
164
+ end
165
+ end
166
+
167
+ # ── Complete / update ────────────────────────────────────────────
168
+
169
+ # Finalize an assistant message with token counts and completion time.
170
+ def complete_assistant(message_id:, tokens: nil)
171
+ @mutex.synchronize do
172
+ msg = @messages[message_id]
173
+ return unless msg
174
+
175
+ msg[:info][:time][:completed] = now_ms
176
+ if tokens
177
+ msg[:info][:tokens] = {
178
+ input: tokens[:input] || tokens[:total_input] || 0,
179
+ output: tokens[:output] || tokens[:total_output] || 0,
180
+ reasoning: tokens[:reasoning] || tokens[:total_reasoning] || 0,
181
+ cache: tokens[:cache] || { read: 0, write: 0 },
182
+ }
183
+ end
184
+ persist(message_id)
185
+ end
186
+ end
187
+
188
+ # ── Queries ──────────────────────────────────────────────────────
189
+
190
+ # All messages in order.
191
+ def messages
192
+ @mutex.synchronize { @messages.values }
193
+ end
194
+
195
+ # Single message by ID.
196
+ def message(id)
197
+ @mutex.synchronize { @messages[id] }
198
+ end
199
+
200
+ # Number of stored messages.
201
+ def count
202
+ @mutex.synchronize { @messages.size }
203
+ end
204
+
205
+ private
206
+
207
+ # ── ID generation ────────────────────────────────────────────────
208
+
209
+ def next_message_id
210
+ @seq += 1
211
+ format("msg_%04d", @seq)
212
+ end
213
+
214
+ def next_part_id
215
+ @part_seq += 1
216
+ format("prt_%04d", @part_seq)
217
+ end
218
+
219
+ def now_ms
220
+ (Time.now.to_f * 1000).to_i
221
+ end
222
+
223
+ # ── Persistence ──────────────────────────────────────────────────
224
+
225
+ def save_message(id, msg)
226
+ @mutex.synchronize do
227
+ @messages[id] = msg
228
+ persist(id)
229
+ end
230
+ end
231
+
232
+ def persist(id)
233
+ FileUtils.mkdir_p(@dir)
234
+ msg = @messages[id]
235
+ return unless msg
236
+
237
+ path = File.join(@dir, "#{id}.json")
238
+ File.write(path, JSON.pretty_generate(msg))
239
+ end
240
+
241
+ # Load any existing message files from disk on init.
242
+ def load_existing
243
+ return unless File.directory?(@dir)
244
+
245
+ Dir.glob(File.join(@dir, "msg_*.json")).sort.each do |path|
246
+ data = JSON.parse(File.read(path), symbolize_names: true)
247
+ id = data.dig(:info, :id)
248
+ next unless id
249
+
250
+ @messages[id] = data
251
+
252
+ # Track sequence numbers so new IDs don't collide
253
+ if (m = id.match(/\Amsg_(\d+)\z/))
254
+ n = m[1].to_i
255
+ @seq = n if n > @seq
256
+ end
257
+
258
+ # Track part sequences too
259
+ (data[:parts] || []).each do |part|
260
+ pid = part[:id]
261
+ if pid.is_a?(String) && (m = pid.match(/\Aprt_(\d+)\z/))
262
+ n = m[1].to_i
263
+ @part_seq = n if n > @part_seq
264
+ end
265
+ end
266
+ end
267
+ end
268
+ end
269
+ end
@@ -10,11 +10,12 @@ module Brute
10
10
  # rebuilds the context with the summary + recent messages.
11
11
  #
12
12
  class CompactionCheck < Base
13
- def initialize(app, compactor:, system_prompt:, tools:)
13
+ def initialize(app, compactor:, system_prompt:, tools:, stream: nil)
14
14
  super(app)
15
15
  @compactor = compactor
16
16
  @system_prompt = system_prompt
17
17
  @tools = tools
18
+ @stream = stream
18
19
  end
19
20
 
20
21
  def call(env)
@@ -43,7 +44,9 @@ module Brute
43
44
 
44
45
  def rebuild_context!(env, summary_text)
45
46
  provider = env[:provider]
46
- new_ctx = LLM::Context.new(provider, tools: @tools)
47
+ ctx_opts = { tools: @tools }
48
+ ctx_opts[:stream] = @stream if @stream
49
+ new_ctx = LLM::Context.new(provider, **ctx_opts)
47
50
  prompt = new_ctx.prompt do |p|
48
51
  p.system @system_prompt
49
52
  p.user "[Previous conversation summary]\n\n#{summary_text}"
@@ -0,0 +1,195 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Brute
4
+ module Middleware
5
+ # Records every LLM exchange into a MessageStore in the OpenCode
6
+ # {info, parts} format so sessions can be viewed later.
7
+ #
8
+ # Lifecycle per pipeline call:
9
+ #
10
+ # 1. PRE-CALL — if this is the first call of a turn (env[:tool_results]
11
+ # is nil), record the user message.
12
+ # 2. POST-CALL — record the assistant message: text content as a "text"
13
+ # part, each tool call as a "tool" part in "running" state.
14
+ # 3. When the pipeline is called again with tool results, update the
15
+ # corresponding tool parts to "completed" (or "error").
16
+ #
17
+ # The middleware also stores itself in env[:message_tracking] so the
18
+ # orchestrator can access the current assistant message ID for callbacks.
19
+ #
20
+ class MessageTracking < Base
21
+ attr_reader :store
22
+
23
+ def initialize(app, store:)
24
+ super(app)
25
+ @store = store
26
+ @current_user_id = nil
27
+ @current_assistant_id = nil
28
+ end
29
+
30
+ def call(env)
31
+ env[:message_tracking] = self
32
+
33
+ # ── Pre-call: record user message or update tool results ──
34
+ if env[:tool_results].nil?
35
+ # New turn — record the user message
36
+ record_user_message(env)
37
+ else
38
+ # Tool results coming back — complete the tool parts
39
+ complete_tool_parts(env)
40
+ end
41
+
42
+ # ── LLM call ──
43
+ response = @app.call(env)
44
+
45
+ # ── Post-call: record assistant message ──
46
+ record_assistant_message(env, response)
47
+
48
+ response
49
+ end
50
+
51
+ # The current assistant message ID (used by external callbacks).
52
+ def current_assistant_id
53
+ @current_assistant_id
54
+ end
55
+
56
+ private
57
+
58
+ # ── User message ───────────────────────────────────────────────
59
+
60
+ def record_user_message(env)
61
+ text = extract_user_text(env)
62
+ return unless text
63
+
64
+ @current_user_id = @store.append_user(text: text)
65
+ end
66
+
67
+ def extract_user_text(env)
68
+ input = env[:input]
69
+ case input
70
+ when String
71
+ input
72
+ when Array
73
+ # llm.rb prompt format: array of message hashes
74
+ user_msg = input.reverse_each.find { |m| m.respond_to?(:role) && m.role.to_s == "user" }
75
+ user_msg&.content.to_s if user_msg
76
+ else
77
+ # Could be a prompt object — try to extract user content
78
+ if input.respond_to?(:messages)
79
+ msgs = input.messages.to_a
80
+ user_msg = msgs.reverse_each.find { |m| m.role.to_s == "user" }
81
+ user_msg&.content.to_s if user_msg
82
+ end
83
+ end
84
+ end
85
+
86
+ # ── Assistant message ──────────────────────────────────────────
87
+
88
+ def record_assistant_message(env, response)
89
+ provider_name = env[:provider]&.class&.name&.split("::")&.last&.downcase
90
+ model_name = env[:provider]&.respond_to?(:default_model) ? env[:provider].default_model.to_s : nil
91
+
92
+ @current_assistant_id = @store.append_assistant(
93
+ parent_id: @current_user_id,
94
+ model_id: model_name,
95
+ provider_id: provider_name,
96
+ )
97
+
98
+ # Text content
99
+ text = safe_content(response)
100
+ @store.add_text_part(message_id: @current_assistant_id, text: text) if text && !text.empty?
101
+
102
+ # Tool calls
103
+ record_tool_calls(env)
104
+
105
+ # Token usage
106
+ tokens = extract_tokens(env, response)
107
+ @store.complete_assistant(message_id: @current_assistant_id, tokens: tokens) if tokens
108
+
109
+ # Step finish
110
+ @store.add_step_finish(message_id: @current_assistant_id, tokens: tokens)
111
+ end
112
+
113
+ def record_tool_calls(env)
114
+ ctx = env[:context]
115
+ functions = ctx.functions
116
+ return if functions.nil? || functions.empty?
117
+
118
+ functions.each do |fn|
119
+ @store.add_tool_part(
120
+ message_id: @current_assistant_id,
121
+ tool: fn.name,
122
+ call_id: fn.id,
123
+ input: fn.arguments,
124
+ )
125
+ end
126
+ end
127
+
128
+ # ── Tool results ───────────────────────────────────────────────
129
+
130
+ def complete_tool_parts(env)
131
+ return unless @current_assistant_id
132
+
133
+ results = env[:tool_results]
134
+ return unless results.is_a?(Array)
135
+
136
+ results.each do |name, value|
137
+ # Find the tool part by name (tool results come as [name, value] pairs)
138
+ msg = @store.message(@current_assistant_id)
139
+ next unless msg
140
+
141
+ # Match by tool name — find the first running tool part with this name
142
+ part = msg[:parts]&.find do |p|
143
+ p[:type] == "tool" && p[:tool] == name && p.dig(:state, :status) == "running"
144
+ end
145
+ next unless part
146
+
147
+ call_id = part[:callID]
148
+ if value.is_a?(Hash) && value[:error]
149
+ @store.error_tool_part(
150
+ message_id: @current_assistant_id,
151
+ call_id: call_id,
152
+ error: value[:error],
153
+ )
154
+ else
155
+ output = value.is_a?(String) ? value : value.to_s
156
+ @store.complete_tool_part(
157
+ message_id: @current_assistant_id,
158
+ call_id: call_id,
159
+ output: output,
160
+ )
161
+ end
162
+ end
163
+ end
164
+
165
+ # ── Helpers ────────────────────────────────────────────────────
166
+
167
+ def safe_content(response)
168
+ return nil unless response.respond_to?(:content)
169
+ response.content
170
+ rescue NoMethodError
171
+ nil
172
+ end
173
+
174
+ def extract_tokens(env, response)
175
+ # Prefer the metadata accumulated by TokenTracking middleware
176
+ meta_tokens = env.dig(:metadata, :tokens, :last_call)
177
+ if meta_tokens
178
+ {
179
+ input: meta_tokens[:input] || 0,
180
+ output: meta_tokens[:output] || 0,
181
+ reasoning: 0,
182
+ cache: { read: 0, write: 0 },
183
+ }
184
+ elsif response.respond_to?(:usage) && (usage = response.usage)
185
+ {
186
+ input: usage.input_tokens.to_i,
187
+ output: usage.output_tokens.to_i,
188
+ reasoning: usage.reasoning_tokens.to_i,
189
+ cache: { read: 0, write: 0 },
190
+ }
191
+ end
192
+ end
193
+ end
194
+ end
195
+ end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Brute
4
+ module Middleware
5
+ module OTel
6
+ # Outermost OTel middleware. Creates a span per LLM pipeline call
7
+ # and passes it through env[:span] for inner OTel middlewares to
8
+ # decorate with events and attributes.
9
+ #
10
+ # When opentelemetry-sdk is not loaded, this is a pure pass-through.
11
+ #
12
+ # Pipeline position: outermost (wraps everything including retries).
13
+ #
14
+ # use Brute::Middleware::OTel::Span
15
+ # use Brute::Middleware::OTel::ToolResults
16
+ # use Brute::Middleware::OTel::ToolCalls
17
+ # use Brute::Middleware::OTel::TokenUsage
18
+ # # ... existing middleware ...
19
+ # run Brute::Middleware::LLMCall.new
20
+ #
21
+ class Span < Base
22
+ def call(env)
23
+ return @app.call(env) unless defined?(::OpenTelemetry::SDK)
24
+
25
+ provider_name = provider_type(env[:provider])
26
+ model = begin; env[:context].model; rescue; nil; end
27
+ span_name = model ? "llm.call #{model}" : "llm.call"
28
+
29
+ attributes = {
30
+ "brute.provider" => provider_name,
31
+ "brute.streaming" => !!env[:streaming],
32
+ "brute.context_messages" => env[:context].messages.to_a.size,
33
+ }
34
+ attributes["brute.model"] = model.to_s if model
35
+ attributes["brute.session_id"] = env[:metadata][:session_id].to_s if env.dig(:metadata, :session_id)
36
+
37
+ tracer.in_span(span_name, attributes: attributes, kind: :internal) do |span|
38
+ env[:span] = span
39
+ response = @app.call(env)
40
+
41
+ # Record response model if it differs from request model
42
+ resp_model = begin; response.model; rescue; nil; end
43
+ span.set_attribute("brute.response_model", resp_model.to_s) if resp_model && resp_model != model
44
+
45
+ response
46
+ rescue ::StandardError => e
47
+ span.record_exception(e)
48
+ span.status = ::OpenTelemetry::Trace::Status.error(e.message)
49
+ raise
50
+ ensure
51
+ env.delete(:span)
52
+ end
53
+ end
54
+
55
+ private
56
+
57
+ def tracer
58
+ @tracer ||= ::OpenTelemetry.tracer_provider.tracer("brute", Brute::VERSION)
59
+ end
60
+
61
+ def provider_type(provider)
62
+ name = provider.class.name.to_s.downcase
63
+ if name.include?("anthropic") then "anthropic"
64
+ elsif name.include?("openai") then "openai"
65
+ elsif name.include?("google") || name.include?("gemini") then "google"
66
+ elsif name.include?("deepseek") then "deepseek"
67
+ elsif name.include?("ollama") then "ollama"
68
+ elsif name.include?("xai") then "xai"
69
+ else "unknown"
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Brute
4
+ module Middleware
5
+ module OTel
6
+ # Records token usage from the LLM response as span attributes.
7
+ #
8
+ # Runs POST-call: reads token counts from the response usage object
9
+ # and sets them as attributes on the span.
10
+ #
11
+ class TokenUsage < Base
12
+ def call(env)
13
+ response = @app.call(env)
14
+
15
+ span = env[:span]
16
+ if span && response.respond_to?(:usage) && (usage = response.usage)
17
+ span.set_attribute("gen_ai.usage.input_tokens", usage.input_tokens.to_i)
18
+ span.set_attribute("gen_ai.usage.output_tokens", usage.output_tokens.to_i)
19
+ span.set_attribute("gen_ai.usage.total_tokens", usage.total_tokens.to_i)
20
+
21
+ reasoning = usage.reasoning_tokens.to_i
22
+ span.set_attribute("gen_ai.usage.reasoning_tokens", reasoning) if reasoning > 0
23
+ end
24
+
25
+ response
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Brute
4
+ module Middleware
5
+ module OTel
6
+ # Records tool calls the LLM requested as span events.
7
+ #
8
+ # Runs POST-call: after the LLM responds, inspects ctx.functions
9
+ # for any tool calls the model wants to make, and adds a span event
10
+ # for each one with the tool name, call ID, and arguments.
11
+ #
12
+ class ToolCalls < Base
13
+ def call(env)
14
+ response = @app.call(env)
15
+
16
+ span = env[:span]
17
+ if span
18
+ functions = env[:context].functions
19
+ if functions && !functions.empty?
20
+ span.set_attribute("brute.tool_calls.count", functions.size)
21
+
22
+ functions.each do |fn|
23
+ attrs = {
24
+ "tool.name" => fn.name.to_s,
25
+ "tool.id" => fn.id.to_s,
26
+ }
27
+ args = fn.arguments
28
+ attrs["tool.arguments"] = args.to_json if args
29
+ span.add_event("tool_call", attributes: attrs)
30
+ end
31
+ end
32
+ end
33
+
34
+ response
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Brute
4
+ module Middleware
5
+ module OTel
6
+ # Records tool results being sent back to the LLM as span events.
7
+ #
8
+ # Runs PRE-call: when env[:tool_results] is present, the orchestrator
9
+ # is sending tool execution results back to the LLM. Each result gets
10
+ # a span event with the tool name and success/error status.
11
+ #
12
+ class ToolResults < Base
13
+ def call(env)
14
+ span = env[:span]
15
+
16
+ if span && (results = env[:tool_results])
17
+ span.set_attribute("brute.tool_results.count", results.size)
18
+
19
+ results.each do |name, value|
20
+ error = value.is_a?(Hash) && value[:error]
21
+ attrs = { "tool.name" => name.to_s }
22
+ if error
23
+ attrs["tool.status"] = "error"
24
+ attrs["tool.error"] = value[:error].to_s
25
+ else
26
+ attrs["tool.status"] = "ok"
27
+ end
28
+ span.add_event("tool_result", attributes: attrs)
29
+ end
30
+ end
31
+
32
+ @app.call(env)
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Brute
4
+ module Middleware
5
+ # OpenTelemetry instrumentation for the LLM pipeline.
6
+ #
7
+ # Each middleware is independent and communicates through env[:span].
8
+ # OTel::Span must be outermost — it creates the span. The rest
9
+ # decorate it with events and attributes from their position in the
10
+ # pipeline.
11
+ #
12
+ # All middlewares are no-ops when opentelemetry-sdk is not loaded.
13
+ #
14
+ # Usage in pipeline:
15
+ #
16
+ # use Brute::Middleware::OTel::Span
17
+ # use Brute::Middleware::OTel::ToolResults
18
+ # use Brute::Middleware::OTel::ToolCalls
19
+ # use Brute::Middleware::OTel::TokenUsage
20
+ #
21
+ module OTel
22
+ end
23
+ end
24
+ end
25
+
26
+ require_relative "otel/span"
27
+ require_relative "otel/tool_results"
28
+ require_relative "otel/tool_calls"
29
+ require_relative "otel/token_usage"
@@ -11,8 +11,16 @@ module Brute
11
11
  # blocks is lost. This causes "unexpected tool_use_id" on the next call
12
12
  # because tool_result references a tool_use that's missing from the buffer.
13
13
  #
14
- # This middleware runs post-call and injects a synthetic assistant message
15
- # when tool calls exist but no assistant message was recorded.
14
+ # This middleware runs post-call and ensures every pending tool_use ID
15
+ # is covered by an assistant message in the buffer. It handles three
16
+ # cases:
17
+ #
18
+ # 1. ctx.functions is non-empty and the assistant message exists → no-op
19
+ # 2. ctx.functions is non-empty but the assistant message is missing
20
+ # (or has different IDs) → inject synthetic message
21
+ # 3. ctx.functions is empty (nil-choice bug) but the stream recorded
22
+ # tool calls → inject synthetic message using stream metadata
23
+ #
16
24
  class ToolUseGuard
17
25
  def initialize(app)
18
26
  @app = app
@@ -22,32 +30,67 @@ module Brute
22
30
  response = @app.call(env)
23
31
 
24
32
  ctx = env[:context]
25
- functions = ctx.functions
26
33
 
27
- # If there are pending tool calls, ensure the buffer has an assistant
28
- # message with tool_use blocks.
34
+ # Collect pending tool data from ctx.functions (primary) or the
35
+ # stream's recorded metadata (fallback for nil-choice bug).
36
+ tool_data = collect_tool_data(ctx, env)
37
+ return response if tool_data.empty?
38
+
39
+ # Find all tool_use IDs already covered by assistant messages.
40
+ covered_ids = covered_tool_ids(ctx)
41
+
42
+ # Inject a synthetic assistant message for any uncovered tool calls.
43
+ uncovered = tool_data.reject { |td| covered_ids.include?(td[:id]) }
44
+ inject_synthetic!(ctx, uncovered) unless uncovered.empty?
45
+
46
+ response
47
+ end
48
+
49
+ private
50
+
51
+ def collect_tool_data(ctx, env)
52
+ functions = ctx.functions
29
53
  if functions && !functions.empty?
30
- messages = ctx.messages.to_a
31
- last_assistant = messages.reverse.find { |m| m.role.to_s == "assistant" }
32
-
33
- unless last_assistant&.tool_call?
34
- # Build a synthetic assistant message with the tool_use data
35
- tool_calls = functions.map do |fn|
36
- LLM::Object.from(id: fn.id, name: fn.name, arguments: fn.arguments)
37
- end
38
- original_tool_calls = functions.map do |fn|
39
- { "type" => "tool_use", "id" => fn.id, "name" => fn.name, "input" => fn.arguments || {} }
40
- end
41
-
42
- synthetic = LLM::Message.new(:assistant, "", {
43
- tool_calls: tool_calls,
44
- original_tool_calls: original_tool_calls,
45
- })
46
- ctx.messages.concat([synthetic])
54
+ functions.map { |fn| { id: fn.id, name: fn.name, arguments: fn.arguments } }
55
+ elsif env[:streaming]
56
+ stream = resolve_stream(ctx)
57
+ if stream
58
+ data = stream.pending_tool_calls.dup
59
+ stream.clear_pending_tool_calls!
60
+ data
61
+ else
62
+ []
47
63
  end
64
+ else
65
+ []
48
66
  end
67
+ end
49
68
 
50
- response
69
+ def resolve_stream(ctx)
70
+ stream = ctx.instance_variable_get(:@params)&.dig(:stream)
71
+ stream if stream.respond_to?(:pending_tool_calls)
72
+ end
73
+
74
+ def covered_tool_ids(ctx)
75
+ ctx.messages.to_a
76
+ .select { |m| m.role.to_s == "assistant" && m.tool_call? }
77
+ .flat_map { |m| (m.extra.original_tool_calls || []).map { |tc| tc["id"] } }
78
+ .to_set
79
+ end
80
+
81
+ def inject_synthetic!(ctx, uncovered)
82
+ tool_calls = uncovered.map do |td|
83
+ LLM::Object.from(id: td[:id], name: td[:name], arguments: td[:arguments])
84
+ end
85
+ original_tool_calls = uncovered.map do |td|
86
+ { "type" => "tool_use", "id" => td[:id], "name" => td[:name], "input" => td[:arguments] || {} }
87
+ end
88
+
89
+ synthetic = LLM::Message.new(:assistant, "", {
90
+ tool_calls: tool_calls,
91
+ original_tool_calls: original_tool_calls,
92
+ })
93
+ ctx.messages.concat([synthetic])
51
94
  end
52
95
  end
53
96
  end
@@ -20,7 +20,7 @@ module Brute
20
20
  class Orchestrator
21
21
  MAX_REQUESTS_PER_TURN = 100
22
22
 
23
- attr_reader :context, :session, :pipeline, :env, :barrier
23
+ attr_reader :context, :session, :pipeline, :env, :barrier, :message_store
24
24
 
25
25
  def initialize(
26
26
  provider:,
@@ -40,6 +40,7 @@ module Brute
40
40
  @cwd = cwd
41
41
  @session = session || Session.new
42
42
  @logger = logger || Logger.new($stderr, level: Logger::INFO)
43
+ @message_store = @session.message_store
43
44
 
44
45
  # Build system prompt
45
46
  custom_rules = load_custom_rules
@@ -65,6 +66,7 @@ module Brute
65
66
  session: @session,
66
67
  logger: @logger,
67
68
  reasoning: reasoning,
69
+ message_store: @message_store,
68
70
  )
69
71
 
70
72
  # The shared env hash — passed to every pipeline.call()
@@ -115,7 +117,7 @@ module Brute
115
117
 
116
118
  # --- Agent loop ---
117
119
  loop do
118
- break if @context.functions.empty?
120
+ break if @context.functions.empty? && (!@stream || @stream.queue.empty?)
119
121
 
120
122
  # Collect tool results.
121
123
  # Streaming: tools already spawned threads during the LLM response — just join them.
@@ -135,7 +137,7 @@ module Brute
135
137
  @request_count += 1
136
138
 
137
139
  # Check limits
138
- break if @context.functions.empty?
140
+ break if @context.functions.empty? && (!@stream || @stream.queue.empty?)
139
141
  break if @request_count >= MAX_REQUESTS_PER_TURN
140
142
  break if @env[:metadata][:tool_error_limit_reached]
141
143
  end
@@ -149,28 +151,42 @@ module Brute
149
151
  # Pipeline construction
150
152
  # ------------------------------------------------------------------
151
153
 
152
- def build_pipeline(compactor:, session:, logger:, reasoning:)
154
+ def build_pipeline(compactor:, session:, logger:, reasoning:, message_store:)
153
155
  sys_prompt = @system_prompt
154
156
  tools = @tool_classes
157
+ stream = @stream
155
158
 
156
159
  Pipeline.new do
157
- # Outermost: timing and logging (sees total elapsed including retries)
160
+ # OTel span lifecycle (outermost creates env[:span])
161
+ use Middleware::OTel::Span
162
+
163
+ # Timing and logging
158
164
  use Middleware::Tracing, logger: logger
159
165
 
166
+ # OTel: record tool results being sent back (pre-call)
167
+ use Middleware::OTel::ToolResults
168
+
160
169
  # Retry transient errors (wraps everything below)
161
170
  use Middleware::Retry
162
171
 
163
172
  # Save after each successful LLM call
164
173
  use Middleware::SessionPersistence, session: session
165
174
 
175
+ # Record structured messages in OpenCode {info, parts} format
176
+ use Middleware::MessageTracking, store: message_store
177
+
166
178
  # Track cumulative token usage
167
179
  use Middleware::TokenTracking
168
180
 
181
+ # OTel: record token usage from response (post-call)
182
+ use Middleware::OTel::TokenUsage
183
+
169
184
  # Check context size and compact if needed
170
185
  use Middleware::CompactionCheck,
171
186
  compactor: compactor,
172
187
  system_prompt: sys_prompt,
173
- tools: tools
188
+ tools: tools,
189
+ stream: stream
174
190
 
175
191
  # Track per-tool errors
176
192
  use Middleware::ToolErrorTracking
@@ -184,6 +200,9 @@ module Brute
184
200
  # Guard against tool-only responses dropping the assistant message
185
201
  use Middleware::ToolUseGuard
186
202
 
203
+ # OTel: record tool calls the LLM requested (post-call, after ToolUseGuard)
204
+ use Middleware::OTel::ToolCalls
205
+
187
206
  # Innermost: the actual LLM call
188
207
  run Middleware::LLMCall.new
189
208
  end
data/lib/brute/session.rb CHANGED
@@ -8,79 +8,154 @@ module Brute
8
8
  # Manages session persistence. Each session is a conversation that can be
9
9
  # saved to disk and resumed later.
10
10
  #
11
- # Sessions are stored as JSON files in a configurable directory
12
- # (defaults to ~/.brute/sessions/).
11
+ # New directory-based layout (per-session directory):
12
+ #
13
+ # ~/.brute/sessions/{session-id}/
14
+ # session.meta.json # session metadata
15
+ # context.json # llm.rb context blob (for resumption)
16
+ # msg_0001.json # structured messages (OpenCode format)
17
+ # msg_0002.json
18
+ # ...
19
+ #
20
+ # Also supports the legacy flat layout for reading:
21
+ #
22
+ # ~/.brute/sessions/{session-id}.json
23
+ # ~/.brute/sessions/{session-id}.meta.json
24
+ #
13
25
  class Session
14
26
  attr_reader :id, :title, :path
15
27
 
16
28
  def initialize(id: nil, dir: nil)
17
29
  @id = id || SecureRandom.uuid
18
- @dir = dir || File.join(Dir.home, ".brute", "sessions")
19
- @path = File.join(@dir, "#{@id}.json")
30
+ @base_dir = dir || File.join(Dir.home, ".brute", "sessions")
31
+ @session_dir = File.join(@base_dir, @id)
32
+ @path = File.join(@session_dir, "context.json")
20
33
  @title = nil
21
34
  @metadata = {}
22
- FileUtils.mkdir_p(@dir)
35
+ FileUtils.mkdir_p(@session_dir)
36
+
37
+ # Check for legacy flat-file layout and migrate path if present
38
+ @legacy_path = File.join(@base_dir, "#{@id}.json")
39
+ @legacy_meta = File.join(@base_dir, "#{@id}.meta.json")
40
+ end
41
+
42
+ # Returns a MessageStore for this session's structured messages.
43
+ def message_store
44
+ @message_store ||= MessageStore.new(session_id: @id, dir: @session_dir)
23
45
  end
24
46
 
25
- # Save a context to this session file.
47
+ # Save a context to this session.
26
48
  def save(context, title: nil, metadata: {})
27
49
  @title = title if title
28
50
  @metadata.merge!(metadata)
29
51
 
30
- data = {
31
- id: @id,
32
- title: @title,
33
- saved_at: Time.now.iso8601,
34
- metadata: @metadata,
35
- }
36
-
37
- # Use llm.rb's built-in serialization
52
+ # Use llm.rb's built-in serialization for context (used for resumption)
38
53
  context.save(path: @path)
39
54
 
40
55
  # Write metadata sidecar
41
- meta_path = @path.sub(/\.json$/, ".meta.json")
42
- File.write(meta_path, JSON.pretty_generate(data))
56
+ save_meta
43
57
  end
44
58
 
45
- # Restore a context from this session file.
59
+ # Restore a context from this session.
46
60
  # Returns true if restored successfully, false if no session file found.
47
61
  def restore(context)
48
- return false unless File.exist?(@path)
62
+ # Try new layout first, then legacy
63
+ ctx_path = if File.exist?(@path)
64
+ @path
65
+ elsif File.exist?(@legacy_path)
66
+ @legacy_path
67
+ end
49
68
 
50
- context.restore(path: @path)
69
+ return false unless ctx_path
51
70
 
52
- # Load metadata sidecar if present
53
- meta_path = @path.sub(/\.json$/, ".meta.json")
54
- if File.exist?(meta_path)
55
- data = JSON.parse(File.read(meta_path), symbolize_names: true)
56
- @title = data[:title]
57
- @metadata = data[:metadata] || {}
58
- end
71
+ context.restore(path: ctx_path)
72
+
73
+ # Load metadata
74
+ load_meta
59
75
 
60
76
  true
61
77
  end
62
78
 
63
79
  # List all saved sessions, newest first.
80
+ # Scans both new directory-based layout and legacy flat files.
64
81
  def self.list(dir: nil)
65
82
  dir ||= File.join(Dir.home, ".brute", "sessions")
66
83
  return [] unless File.directory?(dir)
67
84
 
68
- Dir.glob(File.join(dir, "*.meta.json")).map { |meta_path|
85
+ sessions = {}
86
+
87
+ # New layout: {id}/session.meta.json
88
+ Dir.glob(File.join(dir, "*", "session.meta.json")).each do |meta_path|
89
+ data = JSON.parse(File.read(meta_path), symbolize_names: true)
90
+ id = data[:id]
91
+ next unless id
92
+ sessions[id] = {
93
+ id: id,
94
+ title: data[:title],
95
+ saved_at: data[:saved_at],
96
+ path: File.join(File.dirname(meta_path), "context.json"),
97
+ }
98
+ end
99
+
100
+ # Legacy layout: {id}.meta.json (only if not already found)
101
+ Dir.glob(File.join(dir, "*.meta.json")).each do |meta_path|
102
+ # Skip files inside session subdirectories
103
+ next if meta_path.include?("/session.meta.json")
69
104
  data = JSON.parse(File.read(meta_path), symbolize_names: true)
70
- {
71
- id: data[:id],
105
+ id = data[:id]
106
+ next unless id
107
+ next if sessions.key?(id) # new layout takes precedence
108
+ sessions[id] = {
109
+ id: id,
72
110
  title: data[:title],
73
111
  saved_at: data[:saved_at],
74
112
  path: meta_path.sub(/\.meta\.json$/, ".json"),
75
113
  }
76
- }.sort_by { |s| s[:saved_at] || "" }.reverse
114
+ end
115
+
116
+ sessions.values.sort_by { |s| s[:saved_at] || "" }.reverse
77
117
  end
78
118
 
79
- # Delete a session from disk.
119
+ # Delete a session from disk (both new and legacy layouts).
80
120
  def delete
81
- File.delete(@path) if File.exist?(@path)
82
- meta_path = @path.sub(/\.json$/, ".meta.json")
83
- File.delete(meta_path) if File.exist?(meta_path)
121
+ # New layout: remove the whole directory
122
+ FileUtils.rm_rf(@session_dir) if File.directory?(@session_dir)
123
+
124
+ # Legacy layout: remove flat files
125
+ File.delete(@legacy_path) if File.exist?(@legacy_path)
126
+ File.delete(@legacy_meta) if File.exist?(@legacy_meta)
127
+ end
128
+
129
+ private
130
+
131
+ def meta_path
132
+ File.join(@session_dir, "session.meta.json")
133
+ end
134
+
135
+ def save_meta
136
+ data = {
137
+ id: @id,
138
+ title: @title,
139
+ saved_at: Time.now.iso8601,
140
+ metadata: @metadata,
141
+ }
142
+ FileUtils.mkdir_p(@session_dir)
143
+ File.write(meta_path, JSON.pretty_generate(data))
144
+ end
145
+
146
+ def load_meta
147
+ # Try new layout first
148
+ path = if File.exist?(meta_path)
149
+ meta_path
150
+ elsif File.exist?(@legacy_meta)
151
+ @legacy_meta
152
+ end
153
+
154
+ return unless path
155
+
156
+ data = JSON.parse(File.read(path), symbolize_names: true)
157
+ @title = data[:title]
158
+ @metadata = data[:metadata] || {}
84
159
  end
85
160
  end
86
161
  end
data/lib/brute/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Brute
4
- VERSION = "0.1.7"
4
+ VERSION = "0.1.8"
5
5
  end
data/lib/brute.rb CHANGED
@@ -28,6 +28,7 @@ require_relative 'brute/doom_loop'
28
28
  require_relative 'brute/hooks'
29
29
  require_relative 'brute/compactor'
30
30
  require_relative 'brute/system_prompt'
31
+ require_relative 'brute/message_store'
31
32
  require_relative 'brute/session'
32
33
  require_relative 'brute/pipeline'
33
34
  require_relative 'brute/agent_stream'
@@ -44,10 +45,12 @@ require_relative 'brute/middleware/doom_loop_detection'
44
45
  require_relative 'brute/middleware/token_tracking'
45
46
  require_relative 'brute/middleware/compaction_check'
46
47
  require_relative 'brute/middleware/session_persistence'
48
+ require_relative 'brute/middleware/message_tracking'
47
49
  require_relative 'brute/middleware/tracing'
48
50
  require_relative 'brute/middleware/tool_error_tracking'
49
51
  require_relative 'brute/middleware/reasoning_normalizer'
50
52
  require_relative "brute/middleware/tool_use_guard"
53
+ require_relative "brute/middleware/otel"
51
54
 
52
55
  # Tools
53
56
  require_relative 'brute/tools/fs_read'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: brute
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brute Contributors
@@ -92,10 +92,17 @@ files:
92
92
  - lib/brute/doom_loop.rb
93
93
  - lib/brute/file_mutation_queue.rb
94
94
  - lib/brute/hooks.rb
95
+ - lib/brute/message_store.rb
95
96
  - lib/brute/middleware/base.rb
96
97
  - lib/brute/middleware/compaction_check.rb
97
98
  - lib/brute/middleware/doom_loop_detection.rb
98
99
  - lib/brute/middleware/llm_call.rb
100
+ - lib/brute/middleware/message_tracking.rb
101
+ - lib/brute/middleware/otel.rb
102
+ - lib/brute/middleware/otel/span.rb
103
+ - lib/brute/middleware/otel/token_usage.rb
104
+ - lib/brute/middleware/otel/tool_calls.rb
105
+ - lib/brute/middleware/otel/tool_results.rb
99
106
  - lib/brute/middleware/reasoning_normalizer.rb
100
107
  - lib/brute/middleware/retry.rb
101
108
  - lib/brute/middleware/session_persistence.rb