braintrust 0.0.7 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c6b2bcda06084f2e90d2602659ca71cf0ab574ac8c74c367890cbb2b04740529
|
|
4
|
+
data.tar.gz: 306b5a46660eae3d3e3811d021627883419a4dc4c114e51e40be64c590868c95
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 1db7bf706b260762aa114eb5e8f844cb0567efd5a6f9d8cca03667111c0e89ff68f4e53b3a3adc6ad2192947602fc4b88a8e0057169ad7eff12ccb1c2ecb4951
|
|
7
|
+
data.tar.gz: bbc71c33bb28da124bd1cc61c8bf4f765ec2899a57bf604da624a04c42a7bfce508ed1eb78c4ec421da5038fbaf89086cee8d0b04998d223568df05e8640679f
|
|
@@ -28,6 +28,105 @@ module Braintrust
|
|
|
28
28
|
Braintrust::Trace.parse_openai_usage_tokens(usage)
|
|
29
29
|
end
|
|
30
30
|
|
|
31
|
+
# Aggregate streaming chunks into a single response structure
|
|
32
|
+
# @param chunks [Array<Hash>] array of chunk hashes from stream
|
|
33
|
+
# @return [Hash] aggregated response with choices, usage, id, created, model
|
|
34
|
+
def self.aggregate_streaming_chunks(chunks)
|
|
35
|
+
return {} if chunks.empty?
|
|
36
|
+
|
|
37
|
+
# Initialize aggregated structure
|
|
38
|
+
aggregated = {
|
|
39
|
+
"id" => nil,
|
|
40
|
+
"created" => nil,
|
|
41
|
+
"model" => nil,
|
|
42
|
+
"usage" => nil,
|
|
43
|
+
"choices" => []
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
# Track aggregated content for the first choice
|
|
47
|
+
role = nil
|
|
48
|
+
content = +""
|
|
49
|
+
|
|
50
|
+
chunks.each do |chunk|
|
|
51
|
+
# Capture top-level fields from any chunk that has them
|
|
52
|
+
aggregated["id"] ||= chunk["id"]
|
|
53
|
+
aggregated["created"] ||= chunk["created"]
|
|
54
|
+
aggregated["model"] ||= chunk["model"]
|
|
55
|
+
|
|
56
|
+
# Aggregate usage (usually only in last chunk if stream_options.include_usage is set)
|
|
57
|
+
aggregated["usage"] = chunk["usage"] if chunk["usage"]
|
|
58
|
+
|
|
59
|
+
# Aggregate content from first choice
|
|
60
|
+
if chunk.dig("choices", 0, "delta", "role")
|
|
61
|
+
role ||= chunk.dig("choices", 0, "delta", "role")
|
|
62
|
+
end
|
|
63
|
+
if chunk.dig("choices", 0, "delta", "content")
|
|
64
|
+
content << chunk.dig("choices", 0, "delta", "content")
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Build aggregated choices array
|
|
69
|
+
aggregated["choices"] = [
|
|
70
|
+
{
|
|
71
|
+
"index" => 0,
|
|
72
|
+
"message" => {
|
|
73
|
+
"role" => role || "assistant",
|
|
74
|
+
"content" => content
|
|
75
|
+
},
|
|
76
|
+
"finish_reason" => chunks.dig(-1, "choices", 0, "finish_reason")
|
|
77
|
+
}
|
|
78
|
+
]
|
|
79
|
+
|
|
80
|
+
aggregated
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Aggregate responses streaming chunks into a single response structure
|
|
84
|
+
# @param chunks [Array<Hash>] array of chunk hashes from stream
|
|
85
|
+
# @return [Hash] aggregated response with output, usage, id
|
|
86
|
+
def self.aggregate_responses_chunks(chunks)
|
|
87
|
+
return {} if chunks.empty?
|
|
88
|
+
|
|
89
|
+
# Find the response.completed event which has the final response
|
|
90
|
+
completed_chunk = chunks.find { |c| c["type"] == "response.completed" }
|
|
91
|
+
|
|
92
|
+
if completed_chunk && completed_chunk["response"]
|
|
93
|
+
response = completed_chunk["response"]
|
|
94
|
+
return {
|
|
95
|
+
"id" => response["id"],
|
|
96
|
+
"output" => response["output"],
|
|
97
|
+
"usage" => response["usage"]
|
|
98
|
+
}
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Fallback if no completed event found
|
|
102
|
+
{}
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Set span attributes from response data (works for both streaming and non-streaming)
|
|
106
|
+
# @param span [OpenTelemetry::Trace::Span] the span to set attributes on
|
|
107
|
+
# @param response_data [Hash] response hash with keys: choices, usage, id, created, model, system_fingerprint, service_tier
|
|
108
|
+
# @param time_to_first_token [Float] time to first token in seconds
|
|
109
|
+
# @param metadata [Hash] metadata hash to update with response fields
|
|
110
|
+
def self.set_span_attributes(span, response_data, time_to_first_token, metadata)
|
|
111
|
+
# Set output (choices) as JSON
|
|
112
|
+
if response_data["choices"]&.any?
|
|
113
|
+
set_json_attr(span, "braintrust.output_json", response_data["choices"])
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Set metrics (token usage + time_to_first_token)
|
|
117
|
+
metrics = {}
|
|
118
|
+
if response_data["usage"]
|
|
119
|
+
metrics = parse_usage_tokens(response_data["usage"])
|
|
120
|
+
end
|
|
121
|
+
metrics["time_to_first_token"] = time_to_first_token || 0.0
|
|
122
|
+
set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
|
|
123
|
+
|
|
124
|
+
# Update metadata with response fields
|
|
125
|
+
%w[id created model system_fingerprint service_tier].each do |field|
|
|
126
|
+
metadata[field] = response_data[field] if response_data[field]
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
31
130
|
# Wrap an OpenAI::Client (ruby-openai gem) to automatically create spans
|
|
32
131
|
# Supports both synchronous and streaming requests
|
|
33
132
|
# @param client [OpenAI::Client] the OpenAI client to wrap
|
|
@@ -35,92 +134,235 @@ module Braintrust
|
|
|
35
134
|
def self.wrap(client, tracer_provider: nil)
|
|
36
135
|
tracer_provider ||= ::OpenTelemetry.tracer_provider
|
|
37
136
|
|
|
137
|
+
# Store tracer provider on the client for use by wrapper modules
|
|
138
|
+
client.instance_variable_set(:@braintrust_tracer_provider, tracer_provider)
|
|
139
|
+
|
|
38
140
|
# Wrap chat completions
|
|
39
|
-
wrap_chat(client
|
|
141
|
+
wrap_chat(client)
|
|
142
|
+
|
|
143
|
+
# Wrap responses API if available
|
|
144
|
+
wrap_responses(client) if client.respond_to?(:responses)
|
|
40
145
|
|
|
41
146
|
client
|
|
42
147
|
end
|
|
43
148
|
|
|
44
149
|
# Wrap chat API
|
|
45
150
|
# @param client [OpenAI::Client] the OpenAI client
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
151
|
+
def self.wrap_chat(client)
|
|
152
|
+
client.singleton_class.prepend(ChatWrapper)
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# Wrap responses API
|
|
156
|
+
# @param client [OpenAI::Client] the OpenAI client
|
|
157
|
+
def self.wrap_responses(client)
|
|
158
|
+
# Store tracer provider on the responses object for use by wrapper module
|
|
159
|
+
responses_obj = client.responses
|
|
160
|
+
responses_obj.instance_variable_set(:@braintrust_tracer_provider, client.instance_variable_get(:@braintrust_tracer_provider))
|
|
161
|
+
responses_obj.singleton_class.prepend(ResponsesCreateWrapper)
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# Wrapper module for chat completions
|
|
165
|
+
module ChatWrapper
|
|
166
|
+
def chat(parameters:)
|
|
167
|
+
tracer_provider = @braintrust_tracer_provider
|
|
168
|
+
tracer = tracer_provider.tracer("braintrust")
|
|
169
|
+
|
|
170
|
+
tracer.in_span("Chat Completion") do |span|
|
|
171
|
+
# Track start time for time_to_first_token
|
|
172
|
+
start_time = Time.now
|
|
173
|
+
time_to_first_token = nil
|
|
174
|
+
is_streaming = parameters.key?(:stream) && parameters[:stream].is_a?(Proc)
|
|
175
|
+
|
|
176
|
+
# Initialize metadata hash
|
|
177
|
+
metadata = {
|
|
178
|
+
"provider" => "openai",
|
|
179
|
+
"endpoint" => "/v1/chat/completions"
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
# Capture request metadata fields
|
|
183
|
+
metadata_fields = %w[
|
|
184
|
+
model frequency_penalty logit_bias logprobs max_tokens n
|
|
185
|
+
presence_penalty response_format seed service_tier stop
|
|
186
|
+
stream stream_options temperature top_p top_logprobs
|
|
187
|
+
tools tool_choice parallel_tool_calls user functions function_call
|
|
188
|
+
]
|
|
189
|
+
|
|
190
|
+
metadata_fields.each do |field|
|
|
191
|
+
field_sym = field.to_sym
|
|
192
|
+
if parameters.key?(field_sym)
|
|
193
|
+
# Special handling for stream parameter (it's a Proc)
|
|
194
|
+
metadata[field] = if field == "stream"
|
|
195
|
+
true # Just mark as streaming
|
|
196
|
+
else
|
|
197
|
+
parameters[field_sym]
|
|
77
198
|
end
|
|
78
199
|
end
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
# Set input messages as JSON
|
|
203
|
+
if parameters[:messages]
|
|
204
|
+
RubyOpenAI.set_json_attr(span, "braintrust.input_json", parameters[:messages])
|
|
205
|
+
end
|
|
79
206
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
207
|
+
# Wrap streaming callback if present to capture time to first token and aggregate chunks
|
|
208
|
+
aggregated_chunks = []
|
|
209
|
+
if is_streaming
|
|
210
|
+
original_stream_proc = parameters[:stream]
|
|
211
|
+
parameters = parameters.dup
|
|
212
|
+
parameters[:stream] = proc do |chunk, bytesize|
|
|
213
|
+
# Capture time to first token on first chunk
|
|
214
|
+
time_to_first_token ||= Time.now - start_time
|
|
215
|
+
# Aggregate chunks for later processing
|
|
216
|
+
aggregated_chunks << chunk
|
|
217
|
+
# Call original callback
|
|
218
|
+
original_stream_proc.call(chunk, bytesize)
|
|
83
219
|
end
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
begin
|
|
223
|
+
# Call the original method
|
|
224
|
+
response = super(parameters: parameters)
|
|
225
|
+
|
|
226
|
+
# Calculate time to first token for non-streaming
|
|
227
|
+
time_to_first_token ||= Time.now - start_time unless is_streaming
|
|
228
|
+
|
|
229
|
+
# Process response data
|
|
230
|
+
if is_streaming && !aggregated_chunks.empty?
|
|
231
|
+
# Aggregate streaming chunks into response-like structure
|
|
232
|
+
aggregated_response = RubyOpenAI.aggregate_streaming_chunks(aggregated_chunks)
|
|
233
|
+
RubyOpenAI.set_span_attributes(span, aggregated_response, time_to_first_token, metadata)
|
|
234
|
+
else
|
|
235
|
+
# Non-streaming: use response object directly
|
|
236
|
+
RubyOpenAI.set_span_attributes(span, response || {}, time_to_first_token, metadata)
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
# Set metadata ONCE at the end with complete hash
|
|
240
|
+
RubyOpenAI.set_json_attr(span, "braintrust.metadata", metadata)
|
|
84
241
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
242
|
+
response
|
|
243
|
+
rescue => e
|
|
244
|
+
# Record exception in span
|
|
245
|
+
span.record_exception(e)
|
|
246
|
+
span.status = OpenTelemetry::Trace::Status.error("Exception: #{e.class} - #{e.message}")
|
|
247
|
+
raise
|
|
248
|
+
end
|
|
249
|
+
end
|
|
250
|
+
end
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
# Wrapper module for responses API create method
|
|
254
|
+
module ResponsesCreateWrapper
|
|
255
|
+
def create(parameters:)
|
|
256
|
+
tracer_provider = @braintrust_tracer_provider
|
|
257
|
+
tracer = tracer_provider.tracer("braintrust")
|
|
88
258
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
259
|
+
tracer.in_span("openai.responses.create") do |span|
|
|
260
|
+
# Track start time for time_to_first_token
|
|
261
|
+
start_time = Time.now
|
|
262
|
+
time_to_first_token = nil
|
|
263
|
+
is_streaming = parameters.key?(:stream) && parameters[:stream].is_a?(Proc)
|
|
264
|
+
|
|
265
|
+
# Initialize metadata hash
|
|
266
|
+
metadata = {
|
|
267
|
+
"provider" => "openai",
|
|
268
|
+
"endpoint" => "/v1/responses"
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
# Capture request metadata fields
|
|
272
|
+
metadata_fields = %w[
|
|
273
|
+
model instructions modalities tools parallel_tool_calls
|
|
274
|
+
tool_choice temperature max_tokens top_p frequency_penalty
|
|
275
|
+
presence_penalty seed user store response_format
|
|
276
|
+
reasoning previous_response_id truncation
|
|
277
|
+
]
|
|
278
|
+
|
|
279
|
+
metadata_fields.each do |field|
|
|
280
|
+
field_sym = field.to_sym
|
|
281
|
+
if parameters.key?(field_sym)
|
|
282
|
+
metadata[field] = parameters[field_sym]
|
|
283
|
+
end
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
# Mark as streaming if applicable
|
|
287
|
+
metadata["stream"] = true if is_streaming
|
|
288
|
+
|
|
289
|
+
# Set input as JSON
|
|
290
|
+
if parameters[:input]
|
|
291
|
+
RubyOpenAI.set_json_attr(span, "braintrust.input_json", parameters[:input])
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
# Wrap streaming callback if present to capture time to first token and aggregate chunks
|
|
295
|
+
aggregated_chunks = []
|
|
296
|
+
if is_streaming
|
|
297
|
+
original_stream_proc = parameters[:stream]
|
|
298
|
+
parameters = parameters.dup
|
|
299
|
+
parameters[:stream] = proc do |chunk, event|
|
|
300
|
+
# Capture time to first token on first chunk
|
|
301
|
+
time_to_first_token ||= Time.now - start_time
|
|
302
|
+
# Aggregate chunks for later processing
|
|
303
|
+
aggregated_chunks << chunk
|
|
304
|
+
# Call original callback
|
|
305
|
+
original_stream_proc.call(chunk, event)
|
|
306
|
+
end
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
begin
|
|
310
|
+
# Call the original method
|
|
311
|
+
response = super(parameters: parameters)
|
|
312
|
+
|
|
313
|
+
# Calculate time to first token for non-streaming
|
|
314
|
+
time_to_first_token ||= Time.now - start_time unless is_streaming
|
|
315
|
+
|
|
316
|
+
# Process response data
|
|
317
|
+
if is_streaming && !aggregated_chunks.empty?
|
|
318
|
+
# Aggregate streaming chunks into response-like structure
|
|
319
|
+
aggregated_response = RubyOpenAI.aggregate_responses_chunks(aggregated_chunks)
|
|
320
|
+
|
|
321
|
+
# Set output as JSON
|
|
322
|
+
if aggregated_response["output"]
|
|
323
|
+
RubyOpenAI.set_json_attr(span, "braintrust.output_json", aggregated_response["output"])
|
|
92
324
|
end
|
|
93
325
|
|
|
94
|
-
# Set metrics (token usage)
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
326
|
+
# Set metrics (token usage + time_to_first_token)
|
|
327
|
+
metrics = {}
|
|
328
|
+
if aggregated_response["usage"]
|
|
329
|
+
metrics = RubyOpenAI.parse_usage_tokens(aggregated_response["usage"])
|
|
98
330
|
end
|
|
331
|
+
metrics["time_to_first_token"] = time_to_first_token || 0.0
|
|
332
|
+
RubyOpenAI.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
|
|
99
333
|
|
|
100
|
-
#
|
|
101
|
-
if
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
334
|
+
# Update metadata with response fields
|
|
335
|
+
metadata["id"] = aggregated_response["id"] if aggregated_response["id"]
|
|
336
|
+
else
|
|
337
|
+
# Non-streaming: use response object directly
|
|
338
|
+
if response && response["output"]
|
|
339
|
+
RubyOpenAI.set_json_attr(span, "braintrust.output_json", response["output"])
|
|
106
340
|
end
|
|
107
341
|
|
|
108
|
-
# Set
|
|
109
|
-
|
|
342
|
+
# Set metrics (token usage + time_to_first_token)
|
|
343
|
+
metrics = {}
|
|
344
|
+
if response && response["usage"]
|
|
345
|
+
metrics = RubyOpenAI.parse_usage_tokens(response["usage"])
|
|
346
|
+
end
|
|
347
|
+
metrics["time_to_first_token"] = time_to_first_token || 0.0
|
|
348
|
+
RubyOpenAI.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
|
|
110
349
|
|
|
111
|
-
response
|
|
112
|
-
|
|
113
|
-
# Record exception in span
|
|
114
|
-
span.record_exception(e)
|
|
115
|
-
span.status = OpenTelemetry::Trace::Status.error("Exception: #{e.class} - #{e.message}")
|
|
116
|
-
raise
|
|
350
|
+
# Update metadata with response fields
|
|
351
|
+
metadata["id"] = response["id"] if response && response["id"]
|
|
117
352
|
end
|
|
353
|
+
|
|
354
|
+
# Set metadata ONCE at the end with complete hash
|
|
355
|
+
RubyOpenAI.set_json_attr(span, "braintrust.metadata", metadata)
|
|
356
|
+
|
|
357
|
+
response
|
|
358
|
+
rescue => e
|
|
359
|
+
# Record exception in span
|
|
360
|
+
span.record_exception(e)
|
|
361
|
+
span.status = OpenTelemetry::Trace::Status.error("Exception: #{e.class} - #{e.message}")
|
|
362
|
+
raise
|
|
118
363
|
end
|
|
119
364
|
end
|
|
120
365
|
end
|
|
121
|
-
|
|
122
|
-
# Prepend the wrapper to the client's singleton class
|
|
123
|
-
client.singleton_class.prepend(wrapper)
|
|
124
366
|
end
|
|
125
367
|
end
|
|
126
368
|
end
|
|
@@ -155,7 +155,10 @@ module Braintrust
|
|
|
155
155
|
define_method(:create) do |**params|
|
|
156
156
|
tracer = tracer_provider.tracer("braintrust")
|
|
157
157
|
|
|
158
|
-
tracer.in_span("
|
|
158
|
+
tracer.in_span("Chat Completion") do |span|
|
|
159
|
+
# Track start time for time_to_first_token
|
|
160
|
+
start_time = Time.now
|
|
161
|
+
|
|
159
162
|
# Initialize metadata hash
|
|
160
163
|
metadata = {
|
|
161
164
|
"provider" => "openai",
|
|
@@ -184,6 +187,9 @@ module Braintrust
|
|
|
184
187
|
# Call the original method
|
|
185
188
|
response = super(**params)
|
|
186
189
|
|
|
190
|
+
# Calculate time to first token
|
|
191
|
+
time_to_first_token = Time.now - start_time
|
|
192
|
+
|
|
187
193
|
# Set output (choices) as JSON
|
|
188
194
|
# Use to_h to get the raw structure with all fields (including tool_calls)
|
|
189
195
|
if response.respond_to?(:choices) && response.choices&.any?
|
|
@@ -192,10 +198,13 @@ module Braintrust
|
|
|
192
198
|
end
|
|
193
199
|
|
|
194
200
|
# Set metrics (token usage with advanced details)
|
|
201
|
+
metrics = {}
|
|
195
202
|
if response.respond_to?(:usage) && response.usage
|
|
196
203
|
metrics = Braintrust::Trace::OpenAI.parse_usage_tokens(response.usage)
|
|
197
|
-
span.set_attribute("braintrust.metrics", JSON.generate(metrics)) unless metrics.empty?
|
|
198
204
|
end
|
|
205
|
+
# Add time_to_first_token metric
|
|
206
|
+
metrics["time_to_first_token"] = time_to_first_token
|
|
207
|
+
span.set_attribute("braintrust.metrics", JSON.generate(metrics)) unless metrics.empty?
|
|
199
208
|
|
|
200
209
|
# Add response metadata fields
|
|
201
210
|
metadata["id"] = response.id if response.respond_to?(:id) && response.id
|
|
@@ -214,13 +223,15 @@ module Braintrust
|
|
|
214
223
|
define_method(:stream_raw) do |**params|
|
|
215
224
|
tracer = tracer_provider.tracer("braintrust")
|
|
216
225
|
aggregated_chunks = []
|
|
226
|
+
start_time = Time.now
|
|
227
|
+
time_to_first_token = nil
|
|
217
228
|
metadata = {
|
|
218
229
|
"provider" => "openai",
|
|
219
230
|
"endpoint" => "/v1/chat/completions"
|
|
220
231
|
}
|
|
221
232
|
|
|
222
233
|
# Start span with proper context (will be child of current span if any)
|
|
223
|
-
span = tracer.start_span("
|
|
234
|
+
span = tracer.start_span("Chat Completion")
|
|
224
235
|
|
|
225
236
|
# Capture request metadata fields
|
|
226
237
|
metadata_fields = %i[
|
|
@@ -259,6 +270,8 @@ module Braintrust
|
|
|
259
270
|
original_each = stream.method(:each)
|
|
260
271
|
stream.define_singleton_method(:each) do |&block|
|
|
261
272
|
original_each.call do |chunk|
|
|
273
|
+
# Capture time to first token on first chunk
|
|
274
|
+
time_to_first_token ||= Time.now - start_time
|
|
262
275
|
aggregated_chunks << chunk.to_h
|
|
263
276
|
block&.call(chunk)
|
|
264
277
|
end
|
|
@@ -275,10 +288,13 @@ module Braintrust
|
|
|
275
288
|
Braintrust::Trace::OpenAI.set_json_attr(span, "braintrust.output_json", aggregated_output[:choices])
|
|
276
289
|
|
|
277
290
|
# Set metrics if usage is included (requires stream_options.include_usage)
|
|
291
|
+
metrics = {}
|
|
278
292
|
if aggregated_output[:usage]
|
|
279
293
|
metrics = Braintrust::Trace::OpenAI.parse_usage_tokens(aggregated_output[:usage])
|
|
280
|
-
Braintrust::Trace::OpenAI.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
|
|
281
294
|
end
|
|
295
|
+
# Add time_to_first_token metric
|
|
296
|
+
metrics["time_to_first_token"] = time_to_first_token || 0.0
|
|
297
|
+
Braintrust::Trace::OpenAI.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
|
|
282
298
|
|
|
283
299
|
# Update metadata with response fields
|
|
284
300
|
metadata["id"] = aggregated_output[:id] if aggregated_output[:id]
|
|
@@ -297,13 +313,15 @@ module Braintrust
|
|
|
297
313
|
# Wrap stream for streaming chat completions (returns ChatCompletionStream with convenience methods)
|
|
298
314
|
define_method(:stream) do |**params|
|
|
299
315
|
tracer = tracer_provider.tracer("braintrust")
|
|
316
|
+
start_time = Time.now
|
|
317
|
+
time_to_first_token = nil
|
|
300
318
|
metadata = {
|
|
301
319
|
"provider" => "openai",
|
|
302
320
|
"endpoint" => "/v1/chat/completions"
|
|
303
321
|
}
|
|
304
322
|
|
|
305
323
|
# Start span with proper context (will be child of current span if any)
|
|
306
|
-
span = tracer.start_span("
|
|
324
|
+
span = tracer.start_span("Chat Completion")
|
|
307
325
|
|
|
308
326
|
# Capture request metadata fields
|
|
309
327
|
metadata_fields = %i[
|
|
@@ -354,10 +372,13 @@ module Braintrust
|
|
|
354
372
|
end
|
|
355
373
|
|
|
356
374
|
# Set metrics if usage is available
|
|
375
|
+
metrics = {}
|
|
357
376
|
if snapshot.usage
|
|
358
377
|
metrics = Braintrust::Trace::OpenAI.parse_usage_tokens(snapshot.usage)
|
|
359
|
-
set_json_attr.call("braintrust.metrics", metrics) unless metrics.empty?
|
|
360
378
|
end
|
|
379
|
+
# Add time_to_first_token metric
|
|
380
|
+
metrics["time_to_first_token"] = time_to_first_token || 0.0
|
|
381
|
+
set_json_attr.call("braintrust.metrics", metrics) unless metrics.empty?
|
|
361
382
|
|
|
362
383
|
# Update metadata with response fields
|
|
363
384
|
metadata["id"] = snapshot.id if snapshot.respond_to?(:id) && snapshot.id
|
|
@@ -378,7 +399,11 @@ module Braintrust
|
|
|
378
399
|
# Wrap .each() method - this is the core consumption method
|
|
379
400
|
original_each = stream.method(:each)
|
|
380
401
|
stream.define_singleton_method(:each) do |&block|
|
|
381
|
-
original_each.call
|
|
402
|
+
original_each.call do |chunk|
|
|
403
|
+
# Capture time to first token on first chunk
|
|
404
|
+
time_to_first_token ||= Time.now - start_time
|
|
405
|
+
block&.call(chunk)
|
|
406
|
+
end
|
|
382
407
|
rescue => e
|
|
383
408
|
span.record_exception(e)
|
|
384
409
|
span.status = ::OpenTelemetry::Trace::Status.error("Streaming error: #{e.message}")
|
|
@@ -392,8 +417,13 @@ module Braintrust
|
|
|
392
417
|
stream.define_singleton_method(:text) do
|
|
393
418
|
text_enum = original_text.call
|
|
394
419
|
# Wrap the returned enumerable's .each method
|
|
420
|
+
original_text_each = text_enum.method(:each)
|
|
395
421
|
text_enum.define_singleton_method(:each) do |&block|
|
|
396
|
-
|
|
422
|
+
original_text_each.call do |delta|
|
|
423
|
+
# Capture time to first token on first delta
|
|
424
|
+
time_to_first_token ||= Time.now - start_time
|
|
425
|
+
block&.call(delta)
|
|
426
|
+
end
|
|
397
427
|
rescue => e
|
|
398
428
|
span.record_exception(e)
|
|
399
429
|
span.status = ::OpenTelemetry::Trace::Status.error("Streaming error: #{e.message}")
|
|
@@ -14,16 +14,24 @@ module Braintrust
|
|
|
14
14
|
return metrics unless usage_hash.is_a?(Hash)
|
|
15
15
|
|
|
16
16
|
# Field mappings: OpenAI → Braintrust
|
|
17
|
+
# Supports both Chat Completions API (prompt_tokens, completion_tokens)
|
|
18
|
+
# and Responses API (input_tokens, output_tokens)
|
|
17
19
|
field_map = {
|
|
18
20
|
"prompt_tokens" => "prompt_tokens",
|
|
19
21
|
"completion_tokens" => "completion_tokens",
|
|
20
|
-
"total_tokens" => "tokens"
|
|
22
|
+
"total_tokens" => "tokens",
|
|
23
|
+
# Responses API uses different field names
|
|
24
|
+
"input_tokens" => "prompt_tokens",
|
|
25
|
+
"output_tokens" => "completion_tokens"
|
|
21
26
|
}
|
|
22
27
|
|
|
23
28
|
# Prefix mappings for *_tokens_details
|
|
24
29
|
prefix_map = {
|
|
25
30
|
"prompt" => "prompt",
|
|
26
|
-
"completion" => "completion"
|
|
31
|
+
"completion" => "completion",
|
|
32
|
+
# Responses API uses input/output prefixes
|
|
33
|
+
"input" => "prompt",
|
|
34
|
+
"output" => "completion"
|
|
27
35
|
}
|
|
28
36
|
|
|
29
37
|
usage_hash.each do |key, value|
|
data/lib/braintrust/version.rb
CHANGED