braintrust 0.0.6 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/braintrust/trace/contrib/github.com/alexrudall/ruby-openai/ruby-openai.rb +304 -62
- data/lib/braintrust/trace/contrib/github.com/crmne/ruby_llm.rb +141 -33
- data/lib/braintrust/trace/contrib/openai.rb +38 -8
- data/lib/braintrust/trace/tokens.rb +10 -2
- data/lib/braintrust/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c6b2bcda06084f2e90d2602659ca71cf0ab574ac8c74c367890cbb2b04740529
|
|
4
|
+
data.tar.gz: 306b5a46660eae3d3e3811d021627883419a4dc4c114e51e40be64c590868c95
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 1db7bf706b260762aa114eb5e8f844cb0567efd5a6f9d8cca03667111c0e89ff68f4e53b3a3adc6ad2192947602fc4b88a8e0057169ad7eff12ccb1c2ecb4951
|
|
7
|
+
data.tar.gz: bbc71c33bb28da124bd1cc61c8bf4f765ec2899a57bf604da624a04c42a7bfce508ed1eb78c4ec421da5038fbaf89086cee8d0b04998d223568df05e8640679f
|
|
@@ -28,6 +28,105 @@ module Braintrust
|
|
|
28
28
|
Braintrust::Trace.parse_openai_usage_tokens(usage)
|
|
29
29
|
end
|
|
30
30
|
|
|
31
|
+
# Aggregate streaming chunks into a single response structure
|
|
32
|
+
# @param chunks [Array<Hash>] array of chunk hashes from stream
|
|
33
|
+
# @return [Hash] aggregated response with choices, usage, id, created, model
|
|
34
|
+
def self.aggregate_streaming_chunks(chunks)
|
|
35
|
+
return {} if chunks.empty?
|
|
36
|
+
|
|
37
|
+
# Initialize aggregated structure
|
|
38
|
+
aggregated = {
|
|
39
|
+
"id" => nil,
|
|
40
|
+
"created" => nil,
|
|
41
|
+
"model" => nil,
|
|
42
|
+
"usage" => nil,
|
|
43
|
+
"choices" => []
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
# Track aggregated content for the first choice
|
|
47
|
+
role = nil
|
|
48
|
+
content = +""
|
|
49
|
+
|
|
50
|
+
chunks.each do |chunk|
|
|
51
|
+
# Capture top-level fields from any chunk that has them
|
|
52
|
+
aggregated["id"] ||= chunk["id"]
|
|
53
|
+
aggregated["created"] ||= chunk["created"]
|
|
54
|
+
aggregated["model"] ||= chunk["model"]
|
|
55
|
+
|
|
56
|
+
# Aggregate usage (usually only in last chunk if stream_options.include_usage is set)
|
|
57
|
+
aggregated["usage"] = chunk["usage"] if chunk["usage"]
|
|
58
|
+
|
|
59
|
+
# Aggregate content from first choice
|
|
60
|
+
if chunk.dig("choices", 0, "delta", "role")
|
|
61
|
+
role ||= chunk.dig("choices", 0, "delta", "role")
|
|
62
|
+
end
|
|
63
|
+
if chunk.dig("choices", 0, "delta", "content")
|
|
64
|
+
content << chunk.dig("choices", 0, "delta", "content")
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Build aggregated choices array
|
|
69
|
+
aggregated["choices"] = [
|
|
70
|
+
{
|
|
71
|
+
"index" => 0,
|
|
72
|
+
"message" => {
|
|
73
|
+
"role" => role || "assistant",
|
|
74
|
+
"content" => content
|
|
75
|
+
},
|
|
76
|
+
"finish_reason" => chunks.dig(-1, "choices", 0, "finish_reason")
|
|
77
|
+
}
|
|
78
|
+
]
|
|
79
|
+
|
|
80
|
+
aggregated
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Aggregate responses streaming chunks into a single response structure
|
|
84
|
+
# @param chunks [Array<Hash>] array of chunk hashes from stream
|
|
85
|
+
# @return [Hash] aggregated response with output, usage, id
|
|
86
|
+
def self.aggregate_responses_chunks(chunks)
|
|
87
|
+
return {} if chunks.empty?
|
|
88
|
+
|
|
89
|
+
# Find the response.completed event which has the final response
|
|
90
|
+
completed_chunk = chunks.find { |c| c["type"] == "response.completed" }
|
|
91
|
+
|
|
92
|
+
if completed_chunk && completed_chunk["response"]
|
|
93
|
+
response = completed_chunk["response"]
|
|
94
|
+
return {
|
|
95
|
+
"id" => response["id"],
|
|
96
|
+
"output" => response["output"],
|
|
97
|
+
"usage" => response["usage"]
|
|
98
|
+
}
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Fallback if no completed event found
|
|
102
|
+
{}
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Set span attributes from response data (works for both streaming and non-streaming)
|
|
106
|
+
# @param span [OpenTelemetry::Trace::Span] the span to set attributes on
|
|
107
|
+
# @param response_data [Hash] response hash with keys: choices, usage, id, created, model, system_fingerprint, service_tier
|
|
108
|
+
# @param time_to_first_token [Float] time to first token in seconds
|
|
109
|
+
# @param metadata [Hash] metadata hash to update with response fields
|
|
110
|
+
def self.set_span_attributes(span, response_data, time_to_first_token, metadata)
|
|
111
|
+
# Set output (choices) as JSON
|
|
112
|
+
if response_data["choices"]&.any?
|
|
113
|
+
set_json_attr(span, "braintrust.output_json", response_data["choices"])
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Set metrics (token usage + time_to_first_token)
|
|
117
|
+
metrics = {}
|
|
118
|
+
if response_data["usage"]
|
|
119
|
+
metrics = parse_usage_tokens(response_data["usage"])
|
|
120
|
+
end
|
|
121
|
+
metrics["time_to_first_token"] = time_to_first_token || 0.0
|
|
122
|
+
set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
|
|
123
|
+
|
|
124
|
+
# Update metadata with response fields
|
|
125
|
+
%w[id created model system_fingerprint service_tier].each do |field|
|
|
126
|
+
metadata[field] = response_data[field] if response_data[field]
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
31
130
|
# Wrap an OpenAI::Client (ruby-openai gem) to automatically create spans
|
|
32
131
|
# Supports both synchronous and streaming requests
|
|
33
132
|
# @param client [OpenAI::Client] the OpenAI client to wrap
|
|
@@ -35,92 +134,235 @@ module Braintrust
|
|
|
35
134
|
def self.wrap(client, tracer_provider: nil)
|
|
36
135
|
tracer_provider ||= ::OpenTelemetry.tracer_provider
|
|
37
136
|
|
|
137
|
+
# Store tracer provider on the client for use by wrapper modules
|
|
138
|
+
client.instance_variable_set(:@braintrust_tracer_provider, tracer_provider)
|
|
139
|
+
|
|
38
140
|
# Wrap chat completions
|
|
39
|
-
wrap_chat(client
|
|
141
|
+
wrap_chat(client)
|
|
142
|
+
|
|
143
|
+
# Wrap responses API if available
|
|
144
|
+
wrap_responses(client) if client.respond_to?(:responses)
|
|
40
145
|
|
|
41
146
|
client
|
|
42
147
|
end
|
|
43
148
|
|
|
44
149
|
# Wrap chat API
|
|
45
150
|
# @param client [OpenAI::Client] the OpenAI client
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
151
|
+
def self.wrap_chat(client)
|
|
152
|
+
client.singleton_class.prepend(ChatWrapper)
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# Wrap responses API
|
|
156
|
+
# @param client [OpenAI::Client] the OpenAI client
|
|
157
|
+
def self.wrap_responses(client)
|
|
158
|
+
# Store tracer provider on the responses object for use by wrapper module
|
|
159
|
+
responses_obj = client.responses
|
|
160
|
+
responses_obj.instance_variable_set(:@braintrust_tracer_provider, client.instance_variable_get(:@braintrust_tracer_provider))
|
|
161
|
+
responses_obj.singleton_class.prepend(ResponsesCreateWrapper)
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# Wrapper module for chat completions
|
|
165
|
+
module ChatWrapper
|
|
166
|
+
def chat(parameters:)
|
|
167
|
+
tracer_provider = @braintrust_tracer_provider
|
|
168
|
+
tracer = tracer_provider.tracer("braintrust")
|
|
169
|
+
|
|
170
|
+
tracer.in_span("Chat Completion") do |span|
|
|
171
|
+
# Track start time for time_to_first_token
|
|
172
|
+
start_time = Time.now
|
|
173
|
+
time_to_first_token = nil
|
|
174
|
+
is_streaming = parameters.key?(:stream) && parameters[:stream].is_a?(Proc)
|
|
175
|
+
|
|
176
|
+
# Initialize metadata hash
|
|
177
|
+
metadata = {
|
|
178
|
+
"provider" => "openai",
|
|
179
|
+
"endpoint" => "/v1/chat/completions"
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
# Capture request metadata fields
|
|
183
|
+
metadata_fields = %w[
|
|
184
|
+
model frequency_penalty logit_bias logprobs max_tokens n
|
|
185
|
+
presence_penalty response_format seed service_tier stop
|
|
186
|
+
stream stream_options temperature top_p top_logprobs
|
|
187
|
+
tools tool_choice parallel_tool_calls user functions function_call
|
|
188
|
+
]
|
|
189
|
+
|
|
190
|
+
metadata_fields.each do |field|
|
|
191
|
+
field_sym = field.to_sym
|
|
192
|
+
if parameters.key?(field_sym)
|
|
193
|
+
# Special handling for stream parameter (it's a Proc)
|
|
194
|
+
metadata[field] = if field == "stream"
|
|
195
|
+
true # Just mark as streaming
|
|
196
|
+
else
|
|
197
|
+
parameters[field_sym]
|
|
77
198
|
end
|
|
78
199
|
end
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
# Set input messages as JSON
|
|
203
|
+
if parameters[:messages]
|
|
204
|
+
RubyOpenAI.set_json_attr(span, "braintrust.input_json", parameters[:messages])
|
|
205
|
+
end
|
|
79
206
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
207
|
+
# Wrap streaming callback if present to capture time to first token and aggregate chunks
|
|
208
|
+
aggregated_chunks = []
|
|
209
|
+
if is_streaming
|
|
210
|
+
original_stream_proc = parameters[:stream]
|
|
211
|
+
parameters = parameters.dup
|
|
212
|
+
parameters[:stream] = proc do |chunk, bytesize|
|
|
213
|
+
# Capture time to first token on first chunk
|
|
214
|
+
time_to_first_token ||= Time.now - start_time
|
|
215
|
+
# Aggregate chunks for later processing
|
|
216
|
+
aggregated_chunks << chunk
|
|
217
|
+
# Call original callback
|
|
218
|
+
original_stream_proc.call(chunk, bytesize)
|
|
83
219
|
end
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
begin
|
|
223
|
+
# Call the original method
|
|
224
|
+
response = super(parameters: parameters)
|
|
225
|
+
|
|
226
|
+
# Calculate time to first token for non-streaming
|
|
227
|
+
time_to_first_token ||= Time.now - start_time unless is_streaming
|
|
228
|
+
|
|
229
|
+
# Process response data
|
|
230
|
+
if is_streaming && !aggregated_chunks.empty?
|
|
231
|
+
# Aggregate streaming chunks into response-like structure
|
|
232
|
+
aggregated_response = RubyOpenAI.aggregate_streaming_chunks(aggregated_chunks)
|
|
233
|
+
RubyOpenAI.set_span_attributes(span, aggregated_response, time_to_first_token, metadata)
|
|
234
|
+
else
|
|
235
|
+
# Non-streaming: use response object directly
|
|
236
|
+
RubyOpenAI.set_span_attributes(span, response || {}, time_to_first_token, metadata)
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
# Set metadata ONCE at the end with complete hash
|
|
240
|
+
RubyOpenAI.set_json_attr(span, "braintrust.metadata", metadata)
|
|
84
241
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
242
|
+
response
|
|
243
|
+
rescue => e
|
|
244
|
+
# Record exception in span
|
|
245
|
+
span.record_exception(e)
|
|
246
|
+
span.status = OpenTelemetry::Trace::Status.error("Exception: #{e.class} - #{e.message}")
|
|
247
|
+
raise
|
|
248
|
+
end
|
|
249
|
+
end
|
|
250
|
+
end
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
# Wrapper module for responses API create method
|
|
254
|
+
module ResponsesCreateWrapper
|
|
255
|
+
def create(parameters:)
|
|
256
|
+
tracer_provider = @braintrust_tracer_provider
|
|
257
|
+
tracer = tracer_provider.tracer("braintrust")
|
|
88
258
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
259
|
+
tracer.in_span("openai.responses.create") do |span|
|
|
260
|
+
# Track start time for time_to_first_token
|
|
261
|
+
start_time = Time.now
|
|
262
|
+
time_to_first_token = nil
|
|
263
|
+
is_streaming = parameters.key?(:stream) && parameters[:stream].is_a?(Proc)
|
|
264
|
+
|
|
265
|
+
# Initialize metadata hash
|
|
266
|
+
metadata = {
|
|
267
|
+
"provider" => "openai",
|
|
268
|
+
"endpoint" => "/v1/responses"
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
# Capture request metadata fields
|
|
272
|
+
metadata_fields = %w[
|
|
273
|
+
model instructions modalities tools parallel_tool_calls
|
|
274
|
+
tool_choice temperature max_tokens top_p frequency_penalty
|
|
275
|
+
presence_penalty seed user store response_format
|
|
276
|
+
reasoning previous_response_id truncation
|
|
277
|
+
]
|
|
278
|
+
|
|
279
|
+
metadata_fields.each do |field|
|
|
280
|
+
field_sym = field.to_sym
|
|
281
|
+
if parameters.key?(field_sym)
|
|
282
|
+
metadata[field] = parameters[field_sym]
|
|
283
|
+
end
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
# Mark as streaming if applicable
|
|
287
|
+
metadata["stream"] = true if is_streaming
|
|
288
|
+
|
|
289
|
+
# Set input as JSON
|
|
290
|
+
if parameters[:input]
|
|
291
|
+
RubyOpenAI.set_json_attr(span, "braintrust.input_json", parameters[:input])
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
# Wrap streaming callback if present to capture time to first token and aggregate chunks
|
|
295
|
+
aggregated_chunks = []
|
|
296
|
+
if is_streaming
|
|
297
|
+
original_stream_proc = parameters[:stream]
|
|
298
|
+
parameters = parameters.dup
|
|
299
|
+
parameters[:stream] = proc do |chunk, event|
|
|
300
|
+
# Capture time to first token on first chunk
|
|
301
|
+
time_to_first_token ||= Time.now - start_time
|
|
302
|
+
# Aggregate chunks for later processing
|
|
303
|
+
aggregated_chunks << chunk
|
|
304
|
+
# Call original callback
|
|
305
|
+
original_stream_proc.call(chunk, event)
|
|
306
|
+
end
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
begin
|
|
310
|
+
# Call the original method
|
|
311
|
+
response = super(parameters: parameters)
|
|
312
|
+
|
|
313
|
+
# Calculate time to first token for non-streaming
|
|
314
|
+
time_to_first_token ||= Time.now - start_time unless is_streaming
|
|
315
|
+
|
|
316
|
+
# Process response data
|
|
317
|
+
if is_streaming && !aggregated_chunks.empty?
|
|
318
|
+
# Aggregate streaming chunks into response-like structure
|
|
319
|
+
aggregated_response = RubyOpenAI.aggregate_responses_chunks(aggregated_chunks)
|
|
320
|
+
|
|
321
|
+
# Set output as JSON
|
|
322
|
+
if aggregated_response["output"]
|
|
323
|
+
RubyOpenAI.set_json_attr(span, "braintrust.output_json", aggregated_response["output"])
|
|
92
324
|
end
|
|
93
325
|
|
|
94
|
-
# Set metrics (token usage)
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
326
|
+
# Set metrics (token usage + time_to_first_token)
|
|
327
|
+
metrics = {}
|
|
328
|
+
if aggregated_response["usage"]
|
|
329
|
+
metrics = RubyOpenAI.parse_usage_tokens(aggregated_response["usage"])
|
|
98
330
|
end
|
|
331
|
+
metrics["time_to_first_token"] = time_to_first_token || 0.0
|
|
332
|
+
RubyOpenAI.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
|
|
99
333
|
|
|
100
|
-
#
|
|
101
|
-
if
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
334
|
+
# Update metadata with response fields
|
|
335
|
+
metadata["id"] = aggregated_response["id"] if aggregated_response["id"]
|
|
336
|
+
else
|
|
337
|
+
# Non-streaming: use response object directly
|
|
338
|
+
if response && response["output"]
|
|
339
|
+
RubyOpenAI.set_json_attr(span, "braintrust.output_json", response["output"])
|
|
106
340
|
end
|
|
107
341
|
|
|
108
|
-
# Set
|
|
109
|
-
|
|
342
|
+
# Set metrics (token usage + time_to_first_token)
|
|
343
|
+
metrics = {}
|
|
344
|
+
if response && response["usage"]
|
|
345
|
+
metrics = RubyOpenAI.parse_usage_tokens(response["usage"])
|
|
346
|
+
end
|
|
347
|
+
metrics["time_to_first_token"] = time_to_first_token || 0.0
|
|
348
|
+
RubyOpenAI.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
|
|
110
349
|
|
|
111
|
-
response
|
|
112
|
-
|
|
113
|
-
# Record exception in span
|
|
114
|
-
span.record_exception(e)
|
|
115
|
-
span.status = OpenTelemetry::Trace::Status.error("Exception: #{e.class} - #{e.message}")
|
|
116
|
-
raise
|
|
350
|
+
# Update metadata with response fields
|
|
351
|
+
metadata["id"] = response["id"] if response && response["id"]
|
|
117
352
|
end
|
|
353
|
+
|
|
354
|
+
# Set metadata ONCE at the end with complete hash
|
|
355
|
+
RubyOpenAI.set_json_attr(span, "braintrust.metadata", metadata)
|
|
356
|
+
|
|
357
|
+
response
|
|
358
|
+
rescue => e
|
|
359
|
+
# Record exception in span
|
|
360
|
+
span.record_exception(e)
|
|
361
|
+
span.status = OpenTelemetry::Trace::Status.error("Exception: #{e.class} - #{e.message}")
|
|
362
|
+
raise
|
|
118
363
|
end
|
|
119
364
|
end
|
|
120
365
|
end
|
|
121
|
-
|
|
122
|
-
# Prepend the wrapper to the client's singleton class
|
|
123
|
-
client.singleton_class.prepend(wrapper)
|
|
124
366
|
end
|
|
125
367
|
end
|
|
126
368
|
end
|
|
@@ -71,15 +71,59 @@ module Braintrust
|
|
|
71
71
|
# Check if already wrapped to make this idempotent
|
|
72
72
|
return chat if chat.instance_variable_get(:@braintrust_wrapped)
|
|
73
73
|
|
|
74
|
-
# Create a wrapper module that intercepts chat.
|
|
74
|
+
# Create a wrapper module that intercepts chat.complete
|
|
75
75
|
wrapper = create_wrapper_module(tracer_provider)
|
|
76
76
|
|
|
77
77
|
# Mark as wrapped and prepend the wrapper to the chat instance
|
|
78
78
|
chat.instance_variable_set(:@braintrust_wrapped, true)
|
|
79
79
|
chat.singleton_class.prepend(wrapper)
|
|
80
|
+
|
|
81
|
+
# Register tool callbacks for tool span creation
|
|
82
|
+
register_tool_callbacks(chat, tracer_provider)
|
|
83
|
+
|
|
80
84
|
chat
|
|
81
85
|
end
|
|
82
86
|
|
|
87
|
+
# Register callbacks for tool execution tracing
|
|
88
|
+
# @param chat [RubyLLM::Chat] the chat instance
|
|
89
|
+
# @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider
|
|
90
|
+
def self.register_tool_callbacks(chat, tracer_provider)
|
|
91
|
+
tracer = tracer_provider.tracer("braintrust")
|
|
92
|
+
|
|
93
|
+
# Track tool spans by tool_call_id
|
|
94
|
+
tool_spans = {}
|
|
95
|
+
|
|
96
|
+
# Start tool span when tool is called
|
|
97
|
+
chat.on_tool_call do |tool_call|
|
|
98
|
+
span = tracer.start_span("ruby_llm.tool.#{tool_call.name}")
|
|
99
|
+
set_json_attr(span, "braintrust.span_attributes", {type: "tool"})
|
|
100
|
+
span.set_attribute("tool.name", tool_call.name)
|
|
101
|
+
span.set_attribute("tool.call_id", tool_call.id)
|
|
102
|
+
|
|
103
|
+
# Store tool input
|
|
104
|
+
input = {
|
|
105
|
+
"name" => tool_call.name,
|
|
106
|
+
"arguments" => tool_call.arguments
|
|
107
|
+
}
|
|
108
|
+
set_json_attr(span, "braintrust.input_json", input)
|
|
109
|
+
|
|
110
|
+
tool_spans[tool_call.id] = span
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# End tool span when result is received
|
|
114
|
+
chat.on_tool_result do |result|
|
|
115
|
+
# Find the most recent tool span (RubyLLM doesn't pass tool_call_id to on_tool_result)
|
|
116
|
+
# The spans are processed in order, so we can use the first unfinished one
|
|
117
|
+
tool_call_id, span = tool_spans.find { |_id, s| s }
|
|
118
|
+
if span
|
|
119
|
+
# Store tool output
|
|
120
|
+
set_json_attr(span, "braintrust.output_json", result)
|
|
121
|
+
span.finish
|
|
122
|
+
tool_spans.delete(tool_call_id)
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
83
127
|
# Unwrap RubyLLM to remove Braintrust tracing
|
|
84
128
|
# For class-level unwrapping, removes the initialize override from the wrapper module
|
|
85
129
|
# For instance-level unwrapping, clears the wrapped flag
|
|
@@ -116,50 +160,75 @@ module Braintrust
|
|
|
116
160
|
::RubyLLM::Chat.prepend(wrapper)
|
|
117
161
|
end
|
|
118
162
|
|
|
119
|
-
# Create the wrapper module that intercepts chat.
|
|
163
|
+
# Create the wrapper module that intercepts chat.complete
|
|
164
|
+
# We wrap complete() instead of ask() because:
|
|
165
|
+
# - ask() internally calls complete() for the actual API call
|
|
166
|
+
# - ActiveRecord integration (acts_as_chat) calls complete() directly
|
|
167
|
+
# - This ensures all LLM calls are traced regardless of entry point
|
|
168
|
+
#
|
|
169
|
+
# Important: RubyLLM's complete() calls itself recursively for tool execution.
|
|
170
|
+
# We only create a span for the outermost call to avoid duplicate spans.
|
|
171
|
+
# Tool execution is traced separately via on_tool_call/on_tool_result callbacks.
|
|
172
|
+
#
|
|
120
173
|
# @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider
|
|
121
174
|
# @return [Module] the wrapper module
|
|
122
175
|
def self.create_wrapper_module(tracer_provider)
|
|
123
176
|
Module.new do
|
|
124
|
-
define_method(:
|
|
177
|
+
define_method(:complete) do |&block|
|
|
178
|
+
# Check if we're already inside a traced complete() call
|
|
179
|
+
# If so, just call super without creating a new span
|
|
180
|
+
if @braintrust_in_complete
|
|
181
|
+
if block
|
|
182
|
+
return super(&block)
|
|
183
|
+
else
|
|
184
|
+
return super()
|
|
185
|
+
end
|
|
186
|
+
end
|
|
187
|
+
|
|
125
188
|
tracer = tracer_provider.tracer("braintrust")
|
|
126
189
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
190
|
+
# Mark that we're inside a complete() call
|
|
191
|
+
@braintrust_in_complete = true
|
|
192
|
+
|
|
193
|
+
begin
|
|
194
|
+
if block
|
|
195
|
+
# Handle streaming request
|
|
196
|
+
wrapped_block = proc do |chunk|
|
|
197
|
+
block.call(chunk)
|
|
198
|
+
end
|
|
199
|
+
Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.handle_streaming_complete(self, tracer, block) do |aggregated_chunks|
|
|
200
|
+
super(&proc do |chunk|
|
|
201
|
+
aggregated_chunks << chunk
|
|
202
|
+
wrapped_block.call(chunk)
|
|
203
|
+
end)
|
|
204
|
+
end
|
|
205
|
+
else
|
|
206
|
+
# Handle non-streaming request
|
|
207
|
+
Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.handle_non_streaming_complete(self, tracer) do
|
|
208
|
+
super()
|
|
136
209
|
end
|
|
137
210
|
end
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.handle_non_streaming_ask(self, tracer, prompt, params) do
|
|
141
|
-
super(prompt, **params)
|
|
142
|
-
end
|
|
211
|
+
ensure
|
|
212
|
+
@braintrust_in_complete = false
|
|
143
213
|
end
|
|
144
214
|
end
|
|
145
215
|
end
|
|
146
216
|
end
|
|
147
217
|
|
|
148
|
-
# Handle streaming
|
|
218
|
+
# Handle streaming complete request with tracing
|
|
149
219
|
# @param chat [RubyLLM::Chat] the chat instance
|
|
150
220
|
# @param tracer [OpenTelemetry::Trace::Tracer] the tracer
|
|
151
|
-
# @param prompt [String, nil] the user prompt
|
|
152
|
-
# @param params [Hash] additional parameters
|
|
153
221
|
# @param block [Proc] the streaming block
|
|
154
|
-
def self.
|
|
222
|
+
def self.handle_streaming_complete(chat, tracer, block)
|
|
155
223
|
# Start span immediately for accurate timing
|
|
156
|
-
span = tracer.start_span("ruby_llm.chat
|
|
224
|
+
span = tracer.start_span("ruby_llm.chat")
|
|
157
225
|
|
|
158
226
|
aggregated_chunks = []
|
|
159
227
|
|
|
160
228
|
# Extract metadata and build input messages
|
|
229
|
+
# For complete(), messages are already in chat history (no prompt param)
|
|
161
230
|
metadata = extract_metadata(chat, stream: true)
|
|
162
|
-
input_messages = build_input_messages(chat,
|
|
231
|
+
input_messages = build_input_messages(chat, nil)
|
|
163
232
|
|
|
164
233
|
# Set input and metadata
|
|
165
234
|
set_json_attr(span, "braintrust.input_json", input_messages) if input_messages.any?
|
|
@@ -181,19 +250,18 @@ module Braintrust
|
|
|
181
250
|
result
|
|
182
251
|
end
|
|
183
252
|
|
|
184
|
-
# Handle non-streaming
|
|
253
|
+
# Handle non-streaming complete request with tracing
|
|
185
254
|
# @param chat [RubyLLM::Chat] the chat instance
|
|
186
255
|
# @param tracer [OpenTelemetry::Trace::Tracer] the tracer
|
|
187
|
-
|
|
188
|
-
# @param params [Hash] additional parameters
|
|
189
|
-
def self.handle_non_streaming_ask(chat, tracer, prompt, params)
|
|
256
|
+
def self.handle_non_streaming_complete(chat, tracer)
|
|
190
257
|
# Start span immediately for accurate timing
|
|
191
|
-
span = tracer.start_span("ruby_llm.chat
|
|
258
|
+
span = tracer.start_span("ruby_llm.chat")
|
|
192
259
|
|
|
193
260
|
begin
|
|
194
261
|
# Extract metadata and build input messages
|
|
262
|
+
# For complete(), messages are already in chat history (no prompt param)
|
|
195
263
|
metadata = extract_metadata(chat)
|
|
196
|
-
input_messages = build_input_messages(chat,
|
|
264
|
+
input_messages = build_input_messages(chat, nil)
|
|
197
265
|
set_json_attr(span, "braintrust.input_json", input_messages) if input_messages.any?
|
|
198
266
|
|
|
199
267
|
# Remember message count before the call (for tool call detection)
|
|
@@ -321,23 +389,62 @@ module Braintrust
|
|
|
321
389
|
end
|
|
322
390
|
|
|
323
391
|
# Build input messages array from chat history and prompt
|
|
392
|
+
# Formats messages to match OpenAI's message format
|
|
324
393
|
# @param chat [RubyLLM::Chat] the chat instance
|
|
325
394
|
# @param prompt [String, nil] the user prompt
|
|
326
395
|
# @return [Array<Hash>] array of message hashes
|
|
327
396
|
def self.build_input_messages(chat, prompt)
|
|
328
397
|
input_messages = []
|
|
329
398
|
|
|
330
|
-
# Add conversation history
|
|
399
|
+
# Add conversation history, formatting each message to OpenAI format
|
|
331
400
|
if chat.respond_to?(:messages) && chat.messages&.any?
|
|
332
|
-
input_messages = chat.messages.map { |m| m
|
|
401
|
+
input_messages = chat.messages.map { |m| format_message_for_input(m) }
|
|
333
402
|
end
|
|
334
403
|
|
|
335
404
|
# Add current prompt
|
|
336
|
-
input_messages << {role
|
|
405
|
+
input_messages << {"role" => "user", "content" => prompt} if prompt
|
|
337
406
|
|
|
338
407
|
input_messages
|
|
339
408
|
end
|
|
340
409
|
|
|
410
|
+
# Format a RubyLLM message to OpenAI-compatible format
|
|
411
|
+
# @param msg [Object] the RubyLLM message
|
|
412
|
+
# @return [Hash] OpenAI-formatted message
|
|
413
|
+
def self.format_message_for_input(msg)
|
|
414
|
+
formatted = {
|
|
415
|
+
"role" => msg.role.to_s
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
# Handle content
|
|
419
|
+
if msg.respond_to?(:content) && msg.content
|
|
420
|
+
# Convert Ruby hash notation to JSON string for tool results
|
|
421
|
+
content = msg.content
|
|
422
|
+
if msg.role.to_s == "tool" && content.is_a?(String) && content.start_with?("{:")
|
|
423
|
+
# Ruby hash string like "{:location=>...}" - try to parse and re-serialize as JSON
|
|
424
|
+
begin
|
|
425
|
+
# Simple conversion: replace Ruby hash syntax with JSON
|
|
426
|
+
content = content.gsub(/(?<=\{|, ):(\w+)=>/, '"\1":').gsub("=>", ":")
|
|
427
|
+
rescue
|
|
428
|
+
# Keep original if conversion fails
|
|
429
|
+
end
|
|
430
|
+
end
|
|
431
|
+
formatted["content"] = content
|
|
432
|
+
end
|
|
433
|
+
|
|
434
|
+
# Handle tool_calls for assistant messages
|
|
435
|
+
if msg.respond_to?(:tool_calls) && msg.tool_calls&.any?
|
|
436
|
+
formatted["tool_calls"] = format_tool_calls(msg.tool_calls)
|
|
437
|
+
formatted["content"] = nil
|
|
438
|
+
end
|
|
439
|
+
|
|
440
|
+
# Handle tool_call_id for tool result messages
|
|
441
|
+
if msg.respond_to?(:tool_call_id) && msg.tool_call_id
|
|
442
|
+
formatted["tool_call_id"] = msg.tool_call_id
|
|
443
|
+
end
|
|
444
|
+
|
|
445
|
+
formatted
|
|
446
|
+
end
|
|
447
|
+
|
|
341
448
|
# Capture streaming output and metrics
|
|
342
449
|
# @param span [OpenTelemetry::Trace::Span] the span
|
|
343
450
|
# @param aggregated_chunks [Array] the aggregated chunks
|
|
@@ -383,8 +490,9 @@ module Braintrust
|
|
|
383
490
|
end
|
|
384
491
|
|
|
385
492
|
# Check if there are tool calls in the messages history
|
|
493
|
+
# Look at messages added during this complete() call
|
|
386
494
|
if chat.respond_to?(:messages) && chat.messages
|
|
387
|
-
assistant_msg = chat.messages[
|
|
495
|
+
assistant_msg = chat.messages[messages_before_count..].find { |m|
|
|
388
496
|
m.role.to_s == "assistant" && m.respond_to?(:tool_calls) && m.tool_calls&.any?
|
|
389
497
|
}
|
|
390
498
|
|
|
@@ -155,7 +155,10 @@ module Braintrust
|
|
|
155
155
|
define_method(:create) do |**params|
|
|
156
156
|
tracer = tracer_provider.tracer("braintrust")
|
|
157
157
|
|
|
158
|
-
tracer.in_span("
|
|
158
|
+
tracer.in_span("Chat Completion") do |span|
|
|
159
|
+
# Track start time for time_to_first_token
|
|
160
|
+
start_time = Time.now
|
|
161
|
+
|
|
159
162
|
# Initialize metadata hash
|
|
160
163
|
metadata = {
|
|
161
164
|
"provider" => "openai",
|
|
@@ -184,6 +187,9 @@ module Braintrust
|
|
|
184
187
|
# Call the original method
|
|
185
188
|
response = super(**params)
|
|
186
189
|
|
|
190
|
+
# Calculate time to first token
|
|
191
|
+
time_to_first_token = Time.now - start_time
|
|
192
|
+
|
|
187
193
|
# Set output (choices) as JSON
|
|
188
194
|
# Use to_h to get the raw structure with all fields (including tool_calls)
|
|
189
195
|
if response.respond_to?(:choices) && response.choices&.any?
|
|
@@ -192,10 +198,13 @@ module Braintrust
|
|
|
192
198
|
end
|
|
193
199
|
|
|
194
200
|
# Set metrics (token usage with advanced details)
|
|
201
|
+
metrics = {}
|
|
195
202
|
if response.respond_to?(:usage) && response.usage
|
|
196
203
|
metrics = Braintrust::Trace::OpenAI.parse_usage_tokens(response.usage)
|
|
197
|
-
span.set_attribute("braintrust.metrics", JSON.generate(metrics)) unless metrics.empty?
|
|
198
204
|
end
|
|
205
|
+
# Add time_to_first_token metric
|
|
206
|
+
metrics["time_to_first_token"] = time_to_first_token
|
|
207
|
+
span.set_attribute("braintrust.metrics", JSON.generate(metrics)) unless metrics.empty?
|
|
199
208
|
|
|
200
209
|
# Add response metadata fields
|
|
201
210
|
metadata["id"] = response.id if response.respond_to?(:id) && response.id
|
|
@@ -214,13 +223,15 @@ module Braintrust
|
|
|
214
223
|
define_method(:stream_raw) do |**params|
|
|
215
224
|
tracer = tracer_provider.tracer("braintrust")
|
|
216
225
|
aggregated_chunks = []
|
|
226
|
+
start_time = Time.now
|
|
227
|
+
time_to_first_token = nil
|
|
217
228
|
metadata = {
|
|
218
229
|
"provider" => "openai",
|
|
219
230
|
"endpoint" => "/v1/chat/completions"
|
|
220
231
|
}
|
|
221
232
|
|
|
222
233
|
# Start span with proper context (will be child of current span if any)
|
|
223
|
-
span = tracer.start_span("
|
|
234
|
+
span = tracer.start_span("Chat Completion")
|
|
224
235
|
|
|
225
236
|
# Capture request metadata fields
|
|
226
237
|
metadata_fields = %i[
|
|
@@ -259,6 +270,8 @@ module Braintrust
|
|
|
259
270
|
original_each = stream.method(:each)
|
|
260
271
|
stream.define_singleton_method(:each) do |&block|
|
|
261
272
|
original_each.call do |chunk|
|
|
273
|
+
# Capture time to first token on first chunk
|
|
274
|
+
time_to_first_token ||= Time.now - start_time
|
|
262
275
|
aggregated_chunks << chunk.to_h
|
|
263
276
|
block&.call(chunk)
|
|
264
277
|
end
|
|
@@ -275,10 +288,13 @@ module Braintrust
|
|
|
275
288
|
Braintrust::Trace::OpenAI.set_json_attr(span, "braintrust.output_json", aggregated_output[:choices])
|
|
276
289
|
|
|
277
290
|
# Set metrics if usage is included (requires stream_options.include_usage)
|
|
291
|
+
metrics = {}
|
|
278
292
|
if aggregated_output[:usage]
|
|
279
293
|
metrics = Braintrust::Trace::OpenAI.parse_usage_tokens(aggregated_output[:usage])
|
|
280
|
-
Braintrust::Trace::OpenAI.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
|
|
281
294
|
end
|
|
295
|
+
# Add time_to_first_token metric
|
|
296
|
+
metrics["time_to_first_token"] = time_to_first_token || 0.0
|
|
297
|
+
Braintrust::Trace::OpenAI.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
|
|
282
298
|
|
|
283
299
|
# Update metadata with response fields
|
|
284
300
|
metadata["id"] = aggregated_output[:id] if aggregated_output[:id]
|
|
@@ -297,13 +313,15 @@ module Braintrust
|
|
|
297
313
|
# Wrap stream for streaming chat completions (returns ChatCompletionStream with convenience methods)
|
|
298
314
|
define_method(:stream) do |**params|
|
|
299
315
|
tracer = tracer_provider.tracer("braintrust")
|
|
316
|
+
start_time = Time.now
|
|
317
|
+
time_to_first_token = nil
|
|
300
318
|
metadata = {
|
|
301
319
|
"provider" => "openai",
|
|
302
320
|
"endpoint" => "/v1/chat/completions"
|
|
303
321
|
}
|
|
304
322
|
|
|
305
323
|
# Start span with proper context (will be child of current span if any)
|
|
306
|
-
span = tracer.start_span("
|
|
324
|
+
span = tracer.start_span("Chat Completion")
|
|
307
325
|
|
|
308
326
|
# Capture request metadata fields
|
|
309
327
|
metadata_fields = %i[
|
|
@@ -354,10 +372,13 @@ module Braintrust
|
|
|
354
372
|
end
|
|
355
373
|
|
|
356
374
|
# Set metrics if usage is available
|
|
375
|
+
metrics = {}
|
|
357
376
|
if snapshot.usage
|
|
358
377
|
metrics = Braintrust::Trace::OpenAI.parse_usage_tokens(snapshot.usage)
|
|
359
|
-
set_json_attr.call("braintrust.metrics", metrics) unless metrics.empty?
|
|
360
378
|
end
|
|
379
|
+
# Add time_to_first_token metric
|
|
380
|
+
metrics["time_to_first_token"] = time_to_first_token || 0.0
|
|
381
|
+
set_json_attr.call("braintrust.metrics", metrics) unless metrics.empty?
|
|
361
382
|
|
|
362
383
|
# Update metadata with response fields
|
|
363
384
|
metadata["id"] = snapshot.id if snapshot.respond_to?(:id) && snapshot.id
|
|
@@ -378,7 +399,11 @@ module Braintrust
|
|
|
378
399
|
# Wrap .each() method - this is the core consumption method
|
|
379
400
|
original_each = stream.method(:each)
|
|
380
401
|
stream.define_singleton_method(:each) do |&block|
|
|
381
|
-
original_each.call
|
|
402
|
+
original_each.call do |chunk|
|
|
403
|
+
# Capture time to first token on first chunk
|
|
404
|
+
time_to_first_token ||= Time.now - start_time
|
|
405
|
+
block&.call(chunk)
|
|
406
|
+
end
|
|
382
407
|
rescue => e
|
|
383
408
|
span.record_exception(e)
|
|
384
409
|
span.status = ::OpenTelemetry::Trace::Status.error("Streaming error: #{e.message}")
|
|
@@ -392,8 +417,13 @@ module Braintrust
|
|
|
392
417
|
stream.define_singleton_method(:text) do
|
|
393
418
|
text_enum = original_text.call
|
|
394
419
|
# Wrap the returned enumerable's .each method
|
|
420
|
+
original_text_each = text_enum.method(:each)
|
|
395
421
|
text_enum.define_singleton_method(:each) do |&block|
|
|
396
|
-
|
|
422
|
+
original_text_each.call do |delta|
|
|
423
|
+
# Capture time to first token on first delta
|
|
424
|
+
time_to_first_token ||= Time.now - start_time
|
|
425
|
+
block&.call(delta)
|
|
426
|
+
end
|
|
397
427
|
rescue => e
|
|
398
428
|
span.record_exception(e)
|
|
399
429
|
span.status = ::OpenTelemetry::Trace::Status.error("Streaming error: #{e.message}")
|
|
@@ -14,16 +14,24 @@ module Braintrust
|
|
|
14
14
|
return metrics unless usage_hash.is_a?(Hash)
|
|
15
15
|
|
|
16
16
|
# Field mappings: OpenAI → Braintrust
|
|
17
|
+
# Supports both Chat Completions API (prompt_tokens, completion_tokens)
|
|
18
|
+
# and Responses API (input_tokens, output_tokens)
|
|
17
19
|
field_map = {
|
|
18
20
|
"prompt_tokens" => "prompt_tokens",
|
|
19
21
|
"completion_tokens" => "completion_tokens",
|
|
20
|
-
"total_tokens" => "tokens"
|
|
22
|
+
"total_tokens" => "tokens",
|
|
23
|
+
# Responses API uses different field names
|
|
24
|
+
"input_tokens" => "prompt_tokens",
|
|
25
|
+
"output_tokens" => "completion_tokens"
|
|
21
26
|
}
|
|
22
27
|
|
|
23
28
|
# Prefix mappings for *_tokens_details
|
|
24
29
|
prefix_map = {
|
|
25
30
|
"prompt" => "prompt",
|
|
26
|
-
"completion" => "completion"
|
|
31
|
+
"completion" => "completion",
|
|
32
|
+
# Responses API uses input/output prefixes
|
|
33
|
+
"input" => "prompt",
|
|
34
|
+
"output" => "completion"
|
|
27
35
|
}
|
|
28
36
|
|
|
29
37
|
usage_hash.each do |key, value|
|
data/lib/braintrust/version.rb
CHANGED