brute 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/lib/brute/agent_stream.rb +16 -1
  3. data/lib/brute/message_store.rb +269 -0
  4. data/lib/brute/middleware/compaction_check.rb +5 -2
  5. data/lib/brute/middleware/message_tracking.rb +209 -0
  6. data/lib/brute/middleware/otel/span.rb +75 -0
  7. data/lib/brute/middleware/otel/token_usage.rb +30 -0
  8. data/lib/brute/middleware/otel/tool_calls.rb +39 -0
  9. data/lib/brute/middleware/otel/tool_results.rb +37 -0
  10. data/lib/brute/middleware/otel.rb +29 -0
  11. data/lib/brute/middleware/tool_use_guard.rb +66 -23
  12. data/lib/brute/orchestrator.rb +46 -13
  13. data/lib/brute/prompts/autonomy.rb +21 -0
  14. data/lib/brute/prompts/base.rb +23 -0
  15. data/lib/brute/prompts/build_switch.rb +19 -0
  16. data/lib/brute/prompts/code_references.rb +21 -0
  17. data/lib/brute/prompts/code_style.rb +16 -0
  18. data/lib/brute/prompts/conventions.rb +20 -0
  19. data/lib/brute/prompts/doing_tasks.rb +11 -0
  20. data/lib/brute/prompts/editing_approach.rb +20 -0
  21. data/lib/brute/prompts/editing_constraints.rb +24 -0
  22. data/lib/brute/prompts/environment.rb +25 -0
  23. data/lib/brute/prompts/frontend_tasks.rb +21 -0
  24. data/lib/brute/prompts/git_safety.rb +19 -0
  25. data/lib/brute/prompts/identity.rb +11 -0
  26. data/lib/brute/prompts/instructions.rb +18 -0
  27. data/lib/brute/prompts/max_steps.rb +30 -0
  28. data/lib/brute/prompts/objectivity.rb +16 -0
  29. data/lib/brute/prompts/plan_reminder.rb +40 -0
  30. data/lib/brute/prompts/proactiveness.rb +19 -0
  31. data/lib/brute/prompts/security_and_safety.rb +17 -0
  32. data/lib/brute/prompts/skills.rb +22 -0
  33. data/lib/brute/prompts/task_management.rb +59 -0
  34. data/lib/brute/prompts/text/agents/compaction.txt +15 -0
  35. data/lib/brute/prompts/text/agents/explore.txt +17 -0
  36. data/lib/brute/prompts/text/agents/summary.txt +11 -0
  37. data/lib/brute/prompts/text/agents/title.txt +40 -0
  38. data/lib/brute/prompts/text/doing_tasks/anthropic.txt +11 -0
  39. data/lib/brute/prompts/text/doing_tasks/default.txt +6 -0
  40. data/lib/brute/prompts/text/doing_tasks/google.txt +9 -0
  41. data/lib/brute/prompts/text/identity/anthropic.txt +5 -0
  42. data/lib/brute/prompts/text/identity/default.txt +3 -0
  43. data/lib/brute/prompts/text/identity/google.txt +1 -0
  44. data/lib/brute/prompts/text/identity/openai.txt +3 -0
  45. data/lib/brute/prompts/text/tone_and_style/anthropic.txt +5 -0
  46. data/lib/brute/prompts/text/tone_and_style/default.txt +9 -0
  47. data/lib/brute/prompts/text/tone_and_style/google.txt +6 -0
  48. data/lib/brute/prompts/text/tone_and_style/openai.txt +17 -0
  49. data/lib/brute/prompts/text/tool_usage/anthropic.txt +16 -0
  50. data/lib/brute/prompts/text/tool_usage/default.txt +4 -0
  51. data/lib/brute/prompts/text/tool_usage/google.txt +4 -0
  52. data/lib/brute/prompts/tone_and_style.rb +11 -0
  53. data/lib/brute/prompts/tool_usage.rb +11 -0
  54. data/lib/brute/session.rb +109 -34
  55. data/lib/brute/skill.rb +118 -0
  56. data/lib/brute/system_prompt.rb +119 -64
  57. data/lib/brute/tools/question.rb +59 -0
  58. data/lib/brute/version.rb +1 -1
  59. data/lib/brute.rb +62 -2
  60. metadata +52 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4f0f32487b029541fdb462f5f4958a95e4727150911b51f6d8ab457b875187d8
4
- data.tar.gz: f162d75e227b4270e4a56dba42fe3cdddd23f9492adeff5cd1f345cbbb811961
3
+ metadata.gz: 5a36d054875f1465a0e9bfc380187c98ff08f837f8477f062f784652246d4256
4
+ data.tar.gz: a7c4df2710346a213b3ded2ee9be7d84b0bbe50d04c9e1ac9eaa679b9a35a7b2
5
5
  SHA512:
6
- metadata.gz: 9aa172f042960dc5c9ec3250cf27781614077be7db114edc65d4ce72e178dfcab9ed9a9918d6583ed190d36cc54e865d8118bea9ac9814064a12a3f2b7ac8627
7
- data.tar.gz: c0cc5addf257b161cc06385417a24833bf9f99a73038040abc67c3caef75ab0a0b9e3d357364b0fa0d194ca3b259555b80f09bf805fc12d17be1e4386d8b061b
6
+ metadata.gz: 7893f212130fc7dd94d80e3bf6b926d47ab86e37fc629866bbbec674121d4e8083791a2b66b4437af98d2cb694c0dc89655519844f5797fe1f17e8652d4ddd01
7
+ data.tar.gz: 1c48113815f9ad3f2d068252e851167b369e22ab8c1d5bd1d314387acab5f1e4169a49cde91f7c5bbb4847ade234779712f3f7b5d3fcc43b6c76b80478017a26
@@ -8,11 +8,23 @@ module Brute
8
8
  # response is still streaming. on_tool_result fires as each thread finishes.
9
9
  #
10
10
  class AgentStream < LLM::Stream
11
- def initialize(on_content: nil, on_reasoning: nil, on_tool_call: nil, on_tool_result: nil)
11
+ # Tool call metadata recorded during streaming, used by ToolUseGuard
12
+ # when ctx.functions is empty (nil-choice bug in llm.rb).
13
+ # Cleared by the guard after consumption to prevent stale data from
14
+ # causing duplicate synthetic assistant messages on subsequent calls.
15
+ attr_reader :pending_tool_calls
16
+
17
+ def clear_pending_tool_calls!
18
+ @pending_tool_calls.clear
19
+ end
20
+
21
+ def initialize(on_content: nil, on_reasoning: nil, on_tool_call: nil, on_tool_result: nil, on_question: nil)
12
22
  @on_content = on_content
13
23
  @on_reasoning = on_reasoning
14
24
  @on_tool_call = on_tool_call
15
25
  @on_tool_result = on_tool_result
26
+ @on_question = on_question
27
+ @pending_tool_calls = []
16
28
  end
17
29
 
18
30
  def on_content(text)
@@ -24,6 +36,7 @@ module Brute
24
36
  end
25
37
 
26
38
  def on_tool_call(tool, error)
39
+ @pending_tool_calls << { id: tool.id, name: tool.name, arguments: tool.arguments }
27
40
  @on_tool_call&.call(tool.name, tool.arguments)
28
41
 
29
42
  if error
@@ -38,8 +51,10 @@ module Brute
38
51
 
39
52
  def spawn_with_callback(tool)
40
53
  on_result = @on_tool_result
54
+ on_question = @on_question
41
55
  name = tool.name
42
56
  Thread.new do
57
+ Thread.current[:on_question] = on_question
43
58
  result = tool.call
44
59
  on_result&.call(name, result.respond_to?(:value) ? result.value : result)
45
60
  result
@@ -0,0 +1,269 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "fileutils"
5
+ require "securerandom"
6
+
7
+ module Brute
8
+ # Stores session messages as individual JSON files in the OpenCode
9
+ # {info, parts} format. Each session gets a directory; each message
10
+ # is a numbered JSON file inside it.
11
+ #
12
+ # Storage layout:
13
+ #
14
+ # ~/.brute/sessions/{session-id}/
15
+ # session.meta.json
16
+ # msg_0001.json
17
+ # msg_0002.json
18
+ # ...
19
+ #
20
+ # Message format matches OpenCode's MessageV2.WithParts:
21
+ #
22
+ # { info: { id:, sessionID:, role:, time:, ... },
23
+ # parts: [{ id:, type:, ... }, ...] }
24
+ #
25
+ class MessageStore
26
+ attr_reader :session_id, :dir
27
+
28
+ def initialize(session_id:, dir: nil)
29
+ @session_id = session_id
30
+ @dir = dir || File.join(Dir.home, ".brute", "sessions", session_id)
31
+ @messages = {} # id => { info:, parts: }
32
+ @seq = 0
33
+ @part_seq = 0
34
+ @mutex = Mutex.new
35
+ load_existing
36
+ end
37
+
38
+ # ── Append messages ──────────────────────────────────────────────
39
+
40
+ # Record a user message.
41
+ def append_user(text:, message_id: nil)
42
+ id = message_id || next_message_id
43
+ msg = {
44
+ info: {
45
+ id: id,
46
+ sessionID: @session_id,
47
+ role: "user",
48
+ time: { created: now_ms },
49
+ },
50
+ parts: [
51
+ { id: next_part_id, sessionID: @session_id, messageID: id,
52
+ type: "text", text: text },
53
+ ],
54
+ }
55
+ save_message(id, msg)
56
+ id
57
+ end
58
+
59
+ # Record the start of an assistant message. Returns the message ID.
60
+ # Call complete_assistant later to fill in tokens/timing.
61
+ def append_assistant(message_id: nil, parent_id: nil, model_id: nil, provider_id: nil)
62
+ id = message_id || next_message_id
63
+ msg = {
64
+ info: {
65
+ id: id,
66
+ sessionID: @session_id,
67
+ role: "assistant",
68
+ parentID: parent_id,
69
+ time: { created: now_ms },
70
+ modelID: model_id,
71
+ providerID: provider_id,
72
+ tokens: { input: 0, output: 0, reasoning: 0, cache: { read: 0, write: 0 } },
73
+ cost: 0.0,
74
+ },
75
+ parts: [],
76
+ }
77
+ save_message(id, msg)
78
+ id
79
+ end
80
+
81
+ # ── Parts ────────────────────────────────────────────────────────
82
+
83
+ # Add a text part to an existing message.
84
+ def add_text_part(message_id:, text:)
85
+ @mutex.synchronize do
86
+ msg = @messages[message_id]
87
+ return unless msg
88
+
89
+ part = { id: next_part_id, sessionID: @session_id, messageID: message_id,
90
+ type: "text", text: text }
91
+ msg[:parts] << part
92
+ persist(message_id)
93
+ part[:id]
94
+ end
95
+ end
96
+
97
+ # Add a tool part in "running" state. Returns the part ID.
98
+ def add_tool_part(message_id:, tool:, call_id:, input:)
99
+ @mutex.synchronize do
100
+ msg = @messages[message_id]
101
+ return unless msg
102
+
103
+ part = {
104
+ id: next_part_id, sessionID: @session_id, messageID: message_id,
105
+ type: "tool", callID: call_id, tool: tool,
106
+ state: {
107
+ status: "running",
108
+ input: input,
109
+ time: { start: now_ms },
110
+ },
111
+ }
112
+ msg[:parts] << part
113
+ persist(message_id)
114
+ part[:id]
115
+ end
116
+ end
117
+
118
+ # Mark a tool part as completed with output.
119
+ def complete_tool_part(message_id:, call_id:, output:)
120
+ @mutex.synchronize do
121
+ msg = @messages[message_id]
122
+ return unless msg
123
+
124
+ part = msg[:parts].find { |p| p[:type] == "tool" && p[:callID] == call_id }
125
+ return unless part
126
+
127
+ part[:state][:status] = "completed"
128
+ part[:state][:output] = output
129
+ part[:state][:time][:end] = now_ms
130
+ persist(message_id)
131
+ end
132
+ end
133
+
134
+ # Mark a tool part as errored.
135
+ def error_tool_part(message_id:, call_id:, error:)
136
+ @mutex.synchronize do
137
+ msg = @messages[message_id]
138
+ return unless msg
139
+
140
+ part = msg[:parts].find { |p| p[:type] == "tool" && p[:callID] == call_id }
141
+ return unless part
142
+
143
+ part[:state][:status] = "error"
144
+ part[:state][:error] = error.to_s
145
+ part[:state][:time][:end] = now_ms
146
+ persist(message_id)
147
+ end
148
+ end
149
+
150
+ # Add a step-finish part to an assistant message.
151
+ def add_step_finish(message_id:, tokens: nil)
152
+ @mutex.synchronize do
153
+ msg = @messages[message_id]
154
+ return unless msg
155
+
156
+ part = {
157
+ id: next_part_id, sessionID: @session_id, messageID: message_id,
158
+ type: "step-finish",
159
+ reason: "stop",
160
+ tokens: tokens || { input: 0, output: 0, reasoning: 0, cache: { read: 0, write: 0 } },
161
+ }
162
+ msg[:parts] << part
163
+ persist(message_id)
164
+ end
165
+ end
166
+
167
+ # ── Complete / update ────────────────────────────────────────────
168
+
169
+ # Finalize an assistant message with token counts and completion time.
170
+ def complete_assistant(message_id:, tokens: nil)
171
+ @mutex.synchronize do
172
+ msg = @messages[message_id]
173
+ return unless msg
174
+
175
+ msg[:info][:time][:completed] = now_ms
176
+ if tokens
177
+ msg[:info][:tokens] = {
178
+ input: tokens[:input] || tokens[:total_input] || 0,
179
+ output: tokens[:output] || tokens[:total_output] || 0,
180
+ reasoning: tokens[:reasoning] || tokens[:total_reasoning] || 0,
181
+ cache: tokens[:cache] || { read: 0, write: 0 },
182
+ }
183
+ end
184
+ persist(message_id)
185
+ end
186
+ end
187
+
188
+ # ── Queries ──────────────────────────────────────────────────────
189
+
190
+ # All messages in order.
191
+ def messages
192
+ @mutex.synchronize { @messages.values }
193
+ end
194
+
195
+ # Single message by ID.
196
+ def message(id)
197
+ @mutex.synchronize { @messages[id] }
198
+ end
199
+
200
+ # Number of stored messages.
201
+ def count
202
+ @mutex.synchronize { @messages.size }
203
+ end
204
+
205
+ private
206
+
207
+ # ── ID generation ────────────────────────────────────────────────
208
+
209
+ def next_message_id
210
+ @seq += 1
211
+ format("msg_%04d", @seq)
212
+ end
213
+
214
+ def next_part_id
215
+ @part_seq += 1
216
+ format("prt_%04d", @part_seq)
217
+ end
218
+
219
+ def now_ms
220
+ (Time.now.to_f * 1000).to_i
221
+ end
222
+
223
+ # ── Persistence ──────────────────────────────────────────────────
224
+
225
+ def save_message(id, msg)
226
+ @mutex.synchronize do
227
+ @messages[id] = msg
228
+ persist(id)
229
+ end
230
+ end
231
+
232
+ def persist(id)
233
+ FileUtils.mkdir_p(@dir)
234
+ msg = @messages[id]
235
+ return unless msg
236
+
237
+ path = File.join(@dir, "#{id}.json")
238
+ File.write(path, JSON.pretty_generate(msg))
239
+ end
240
+
241
+ # Load any existing message files from disk on init.
242
+ def load_existing
243
+ return unless File.directory?(@dir)
244
+
245
+ Dir.glob(File.join(@dir, "msg_*.json")).sort.each do |path|
246
+ data = JSON.parse(File.read(path), symbolize_names: true)
247
+ id = data.dig(:info, :id)
248
+ next unless id
249
+
250
+ @messages[id] = data
251
+
252
+ # Track sequence numbers so new IDs don't collide
253
+ if (m = id.match(/\Amsg_(\d+)\z/))
254
+ n = m[1].to_i
255
+ @seq = n if n > @seq
256
+ end
257
+
258
+ # Track part sequences too
259
+ (data[:parts] || []).each do |part|
260
+ pid = part[:id]
261
+ if pid.is_a?(String) && (m = pid.match(/\Aprt_(\d+)\z/))
262
+ n = m[1].to_i
263
+ @part_seq = n if n > @part_seq
264
+ end
265
+ end
266
+ end
267
+ end
268
+ end
269
+ end
@@ -10,11 +10,12 @@ module Brute
10
10
  # rebuilds the context with the summary + recent messages.
11
11
  #
12
12
  class CompactionCheck < Base
13
- def initialize(app, compactor:, system_prompt:, tools:)
13
+ def initialize(app, compactor:, system_prompt:, tools:, stream: nil)
14
14
  super(app)
15
15
  @compactor = compactor
16
16
  @system_prompt = system_prompt
17
17
  @tools = tools
18
+ @stream = stream
18
19
  end
19
20
 
20
21
  def call(env)
@@ -43,7 +44,9 @@ module Brute
43
44
 
44
45
  def rebuild_context!(env, summary_text)
45
46
  provider = env[:provider]
46
- new_ctx = LLM::Context.new(provider, tools: @tools)
47
+ ctx_opts = { tools: @tools }
48
+ ctx_opts[:stream] = @stream if @stream
49
+ new_ctx = LLM::Context.new(provider, **ctx_opts)
47
50
  prompt = new_ctx.prompt do |p|
48
51
  p.system @system_prompt
49
52
  p.user "[Previous conversation summary]\n\n#{summary_text}"
@@ -0,0 +1,209 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Brute
4
+ module Middleware
5
+ # Records every LLM exchange into a MessageStore in the OpenCode
6
+ # {info, parts} format so sessions can be viewed later.
7
+ #
8
+ # Lifecycle per pipeline call:
9
+ #
10
+ # 1. PRE-CALL — if this is the first call of a turn (env[:tool_results]
11
+ # is nil), record the user message.
12
+ # 2. POST-CALL — record the assistant message: text content as a "text"
13
+ # part, each tool call as a "tool" part in "running" state.
14
+ # 3. When the pipeline is called again with tool results, update the
15
+ # corresponding tool parts to "completed" (or "error").
16
+ #
17
+ # The middleware also stores itself in env[:message_tracking] so the
18
+ # orchestrator can access the current assistant message ID for callbacks.
19
+ #
20
+ class MessageTracking < Base
21
+ attr_reader :store
22
+
23
+ def initialize(app, store:)
24
+ super(app)
25
+ @store = store
26
+ @current_user_id = nil
27
+ @current_assistant_id = nil
28
+ end
29
+
30
+ def call(env)
31
+ env[:message_tracking] = self
32
+
33
+ # ── Pre-call: record user message or update tool results ──
34
+ if env[:tool_results].nil?
35
+ # New turn — record the user message
36
+ record_user_message(env)
37
+ else
38
+ # Tool results coming back — complete the tool parts
39
+ complete_tool_parts(env)
40
+ end
41
+
42
+ # ── LLM call ──
43
+ response = @app.call(env)
44
+
45
+ # ── Post-call: record assistant message ──
46
+ record_assistant_message(env, response)
47
+
48
+ response
49
+ end
50
+
51
+ # The current assistant message ID (used by external callbacks).
52
+ def current_assistant_id
53
+ @current_assistant_id
54
+ end
55
+
56
+ private
57
+
58
+ # ── User message ───────────────────────────────────────────────
59
+
60
+ def record_user_message(env)
61
+ text = extract_user_text(env)
62
+ return unless text
63
+
64
+ @current_user_id = @store.append_user(text: text)
65
+ end
66
+
67
+ def extract_user_text(env)
68
+ input = env[:input]
69
+ case input
70
+ when String
71
+ input
72
+ when Array
73
+ # llm.rb prompt format: array of message hashes
74
+ user_msg = input.reverse_each.find { |m| m.respond_to?(:role) && m.role.to_s == "user" }
75
+ user_msg&.content.to_s if user_msg
76
+ else
77
+ # Could be a prompt object — try to extract user content
78
+ if input.respond_to?(:messages)
79
+ msgs = input.messages.to_a
80
+ user_msg = msgs.reverse_each.find { |m| m.role.to_s == "user" }
81
+ user_msg&.content.to_s if user_msg
82
+ end
83
+ end
84
+ end
85
+
86
+ # ── Assistant message ──────────────────────────────────────────
87
+
88
+ def record_assistant_message(env, response)
89
+ provider_name = env[:provider]&.class&.name&.split("::")&.last&.downcase
90
+ model_name = resolve_model_name(env)
91
+
92
+ @current_assistant_id = @store.append_assistant(
93
+ parent_id: @current_user_id,
94
+ model_id: model_name,
95
+ provider_id: provider_name,
96
+ )
97
+
98
+ # Text content
99
+ text = safe_content(response)
100
+ @store.add_text_part(message_id: @current_assistant_id, text: text) if text && !text.empty?
101
+
102
+ # Tool calls
103
+ record_tool_calls(env)
104
+
105
+ # Token usage
106
+ tokens = extract_tokens(env, response)
107
+ @store.complete_assistant(message_id: @current_assistant_id, tokens: tokens) if tokens
108
+
109
+ # Step finish
110
+ @store.add_step_finish(message_id: @current_assistant_id, tokens: tokens)
111
+ end
112
+
113
+ def record_tool_calls(env)
114
+ ctx = env[:context]
115
+ functions = ctx.functions
116
+ return if functions.nil? || functions.empty?
117
+
118
+ functions.each do |fn|
119
+ @store.add_tool_part(
120
+ message_id: @current_assistant_id,
121
+ tool: fn.name,
122
+ call_id: fn.id,
123
+ input: fn.arguments,
124
+ )
125
+ end
126
+ end
127
+
128
+ # ── Tool results ───────────────────────────────────────────────
129
+
130
+ def complete_tool_parts(env)
131
+ return unless @current_assistant_id
132
+
133
+ results = env[:tool_results]
134
+ return unless results.is_a?(Array)
135
+
136
+ results.each do |name, value|
137
+ # Find the tool part by name (tool results come as [name, value] pairs)
138
+ msg = @store.message(@current_assistant_id)
139
+ next unless msg
140
+
141
+ # Match by tool name — find the first running tool part with this name
142
+ part = msg[:parts]&.find do |p|
143
+ p[:type] == "tool" && p[:tool] == name && p.dig(:state, :status) == "running"
144
+ end
145
+ next unless part
146
+
147
+ call_id = part[:callID]
148
+ if value.is_a?(Hash) && value[:error]
149
+ @store.error_tool_part(
150
+ message_id: @current_assistant_id,
151
+ call_id: call_id,
152
+ error: value[:error],
153
+ )
154
+ else
155
+ output = value.is_a?(String) ? value : value.to_s
156
+ @store.complete_tool_part(
157
+ message_id: @current_assistant_id,
158
+ call_id: call_id,
159
+ output: output,
160
+ )
161
+ end
162
+ end
163
+ end
164
+
165
+ # ── Helpers ────────────────────────────────────────────────────
166
+
167
+ # Resolve the actual model used for the request.
168
+ # Prefers the model set on the LLM::Context (which respects user overrides)
169
+ # and falls back to the provider's default_model.
170
+ def resolve_model_name(env)
171
+ ctx = env[:context]
172
+ if ctx && ctx.instance_variable_defined?(:@params)
173
+ ctx_model = ctx.instance_variable_get(:@params)&.dig(:model)
174
+ return ctx_model.to_s if ctx_model
175
+ end
176
+
177
+ # Fall back to provider default
178
+ env[:provider]&.respond_to?(:default_model) ? env[:provider].default_model.to_s : nil
179
+ end
180
+
181
+ def safe_content(response)
182
+ return nil unless response.respond_to?(:content)
183
+ response.content
184
+ rescue NoMethodError
185
+ nil
186
+ end
187
+
188
+ def extract_tokens(env, response)
189
+ # Prefer the metadata accumulated by TokenTracking middleware
190
+ meta_tokens = env.dig(:metadata, :tokens, :last_call)
191
+ if meta_tokens
192
+ {
193
+ input: meta_tokens[:input] || 0,
194
+ output: meta_tokens[:output] || 0,
195
+ reasoning: 0,
196
+ cache: { read: 0, write: 0 },
197
+ }
198
+ elsif response.respond_to?(:usage) && (usage = response.usage)
199
+ {
200
+ input: usage.input_tokens.to_i,
201
+ output: usage.output_tokens.to_i,
202
+ reasoning: usage.reasoning_tokens.to_i,
203
+ cache: { read: 0, write: 0 },
204
+ }
205
+ end
206
+ end
207
+ end
208
+ end
209
+ end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Brute
4
+ module Middleware
5
+ module OTel
6
+ # Outermost OTel middleware. Creates a span per LLM pipeline call
7
+ # and passes it through env[:span] for inner OTel middlewares to
8
+ # decorate with events and attributes.
9
+ #
10
+ # When opentelemetry-sdk is not loaded, this is a pure pass-through.
11
+ #
12
+ # Pipeline position: outermost (wraps everything including retries).
13
+ #
14
+ # use Brute::Middleware::OTel::Span
15
+ # use Brute::Middleware::OTel::ToolResults
16
+ # use Brute::Middleware::OTel::ToolCalls
17
+ # use Brute::Middleware::OTel::TokenUsage
18
+ # # ... existing middleware ...
19
+ # run Brute::Middleware::LLMCall.new
20
+ #
21
+ class Span < Base
22
+ def call(env)
23
+ return @app.call(env) unless defined?(::OpenTelemetry::SDK)
24
+
25
+ provider_name = provider_type(env[:provider])
26
+ model = begin; env[:context].model; rescue; nil; end
27
+ span_name = model ? "llm.call #{model}" : "llm.call"
28
+
29
+ attributes = {
30
+ "brute.provider" => provider_name,
31
+ "brute.streaming" => !!env[:streaming],
32
+ "brute.context_messages" => env[:context].messages.to_a.size,
33
+ }
34
+ attributes["brute.model"] = model.to_s if model
35
+ attributes["brute.session_id"] = env[:metadata][:session_id].to_s if env.dig(:metadata, :session_id)
36
+
37
+ tracer.in_span(span_name, attributes: attributes, kind: :internal) do |span|
38
+ env[:span] = span
39
+ response = @app.call(env)
40
+
41
+ # Record response model if it differs from request model
42
+ resp_model = begin; response.model; rescue; nil; end
43
+ span.set_attribute("brute.response_model", resp_model.to_s) if resp_model && resp_model != model
44
+
45
+ response
46
+ rescue ::StandardError => e
47
+ span.record_exception(e)
48
+ span.status = ::OpenTelemetry::Trace::Status.error(e.message)
49
+ raise
50
+ ensure
51
+ env.delete(:span)
52
+ end
53
+ end
54
+
55
+ private
56
+
57
+ def tracer
58
+ @tracer ||= ::OpenTelemetry.tracer_provider.tracer("brute", Brute::VERSION)
59
+ end
60
+
61
+ def provider_type(provider)
62
+ name = provider.class.name.to_s.downcase
63
+ if name.include?("anthropic") then "anthropic"
64
+ elsif name.include?("openai") then "openai"
65
+ elsif name.include?("google") || name.include?("gemini") then "google"
66
+ elsif name.include?("deepseek") then "deepseek"
67
+ elsif name.include?("ollama") then "ollama"
68
+ elsif name.include?("xai") then "xai"
69
+ else "unknown"
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Brute
4
+ module Middleware
5
+ module OTel
6
+ # Records token usage from the LLM response as span attributes.
7
+ #
8
+ # Runs POST-call: reads token counts from the response usage object
9
+ # and sets them as attributes on the span.
10
+ #
11
+ class TokenUsage < Base
12
+ def call(env)
13
+ response = @app.call(env)
14
+
15
+ span = env[:span]
16
+ if span && response.respond_to?(:usage) && (usage = response.usage)
17
+ span.set_attribute("gen_ai.usage.input_tokens", usage.input_tokens.to_i)
18
+ span.set_attribute("gen_ai.usage.output_tokens", usage.output_tokens.to_i)
19
+ span.set_attribute("gen_ai.usage.total_tokens", usage.total_tokens.to_i)
20
+
21
+ reasoning = usage.reasoning_tokens.to_i
22
+ span.set_attribute("gen_ai.usage.reasoning_tokens", reasoning) if reasoning > 0
23
+ end
24
+
25
+ response
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end