braintrust 0.0.6 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 866cb2e797502f00cda1625ad90f4d734b4b83f0d21d8243675a933fae9df693
4
- data.tar.gz: f74151b0e18b12cf19b61b1b75b2f58e784d4171f21c0996526d29c719174260
3
+ metadata.gz: c6b2bcda06084f2e90d2602659ca71cf0ab574ac8c74c367890cbb2b04740529
4
+ data.tar.gz: 306b5a46660eae3d3e3811d021627883419a4dc4c114e51e40be64c590868c95
5
5
  SHA512:
6
- metadata.gz: ad2f68a6de8d547b6a609c3393522c4ae3dfcb441a9fc841484bbbcb21de7648da7a00cd625612d98c6b99e4ad41186a2bc3fff706e17b9797e7ac514e685923
7
- data.tar.gz: f0613e5fa08c07333c74467ec7830a40f72905475e35becf7a2add077168c7554046aa9a3824fe24006870338163526e8d170cfd25727af5d53416283ae03714
6
+ metadata.gz: 1db7bf706b260762aa114eb5e8f844cb0567efd5a6f9d8cca03667111c0e89ff68f4e53b3a3adc6ad2192947602fc4b88a8e0057169ad7eff12ccb1c2ecb4951
7
+ data.tar.gz: bbc71c33bb28da124bd1cc61c8bf4f765ec2899a57bf604da624a04c42a7bfce508ed1eb78c4ec421da5038fbaf89086cee8d0b04998d223568df05e8640679f
@@ -28,6 +28,105 @@ module Braintrust
28
28
  Braintrust::Trace.parse_openai_usage_tokens(usage)
29
29
  end
30
30
 
31
+ # Aggregate streaming chunks into a single response structure
32
+ # @param chunks [Array<Hash>] array of chunk hashes from stream
33
+ # @return [Hash] aggregated response with choices, usage, id, created, model
34
+ def self.aggregate_streaming_chunks(chunks)
35
+ return {} if chunks.empty?
36
+
37
+ # Initialize aggregated structure
38
+ aggregated = {
39
+ "id" => nil,
40
+ "created" => nil,
41
+ "model" => nil,
42
+ "usage" => nil,
43
+ "choices" => []
44
+ }
45
+
46
+ # Track aggregated content for the first choice
47
+ role = nil
48
+ content = +""
49
+
50
+ chunks.each do |chunk|
51
+ # Capture top-level fields from any chunk that has them
52
+ aggregated["id"] ||= chunk["id"]
53
+ aggregated["created"] ||= chunk["created"]
54
+ aggregated["model"] ||= chunk["model"]
55
+
56
+ # Aggregate usage (usually only in last chunk if stream_options.include_usage is set)
57
+ aggregated["usage"] = chunk["usage"] if chunk["usage"]
58
+
59
+ # Aggregate content from first choice
60
+ if chunk.dig("choices", 0, "delta", "role")
61
+ role ||= chunk.dig("choices", 0, "delta", "role")
62
+ end
63
+ if chunk.dig("choices", 0, "delta", "content")
64
+ content << chunk.dig("choices", 0, "delta", "content")
65
+ end
66
+ end
67
+
68
+ # Build aggregated choices array
69
+ aggregated["choices"] = [
70
+ {
71
+ "index" => 0,
72
+ "message" => {
73
+ "role" => role || "assistant",
74
+ "content" => content
75
+ },
76
+ "finish_reason" => chunks.dig(-1, "choices", 0, "finish_reason")
77
+ }
78
+ ]
79
+
80
+ aggregated
81
+ end
82
+
83
+ # Aggregate responses streaming chunks into a single response structure
84
+ # @param chunks [Array<Hash>] array of chunk hashes from stream
85
+ # @return [Hash] aggregated response with output, usage, id
86
+ def self.aggregate_responses_chunks(chunks)
87
+ return {} if chunks.empty?
88
+
89
+ # Find the response.completed event which has the final response
90
+ completed_chunk = chunks.find { |c| c["type"] == "response.completed" }
91
+
92
+ if completed_chunk && completed_chunk["response"]
93
+ response = completed_chunk["response"]
94
+ return {
95
+ "id" => response["id"],
96
+ "output" => response["output"],
97
+ "usage" => response["usage"]
98
+ }
99
+ end
100
+
101
+ # Fallback if no completed event found
102
+ {}
103
+ end
104
+
105
+ # Set span attributes from response data (works for both streaming and non-streaming)
106
+ # @param span [OpenTelemetry::Trace::Span] the span to set attributes on
107
+ # @param response_data [Hash] response hash with keys: choices, usage, id, created, model, system_fingerprint, service_tier
108
+ # @param time_to_first_token [Float] time to first token in seconds
109
+ # @param metadata [Hash] metadata hash to update with response fields
110
+ def self.set_span_attributes(span, response_data, time_to_first_token, metadata)
111
+ # Set output (choices) as JSON
112
+ if response_data["choices"]&.any?
113
+ set_json_attr(span, "braintrust.output_json", response_data["choices"])
114
+ end
115
+
116
+ # Set metrics (token usage + time_to_first_token)
117
+ metrics = {}
118
+ if response_data["usage"]
119
+ metrics = parse_usage_tokens(response_data["usage"])
120
+ end
121
+ metrics["time_to_first_token"] = time_to_first_token || 0.0
122
+ set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
123
+
124
+ # Update metadata with response fields
125
+ %w[id created model system_fingerprint service_tier].each do |field|
126
+ metadata[field] = response_data[field] if response_data[field]
127
+ end
128
+ end
129
+
31
130
  # Wrap an OpenAI::Client (ruby-openai gem) to automatically create spans
32
131
  # Supports both synchronous and streaming requests
33
132
  # @param client [OpenAI::Client] the OpenAI client to wrap
@@ -35,92 +134,235 @@ module Braintrust
35
134
  def self.wrap(client, tracer_provider: nil)
36
135
  tracer_provider ||= ::OpenTelemetry.tracer_provider
37
136
 
137
+ # Store tracer provider on the client for use by wrapper modules
138
+ client.instance_variable_set(:@braintrust_tracer_provider, tracer_provider)
139
+
38
140
  # Wrap chat completions
39
- wrap_chat(client, tracer_provider)
141
+ wrap_chat(client)
142
+
143
+ # Wrap responses API if available
144
+ wrap_responses(client) if client.respond_to?(:responses)
40
145
 
41
146
  client
42
147
  end
43
148
 
44
149
  # Wrap chat API
45
150
  # @param client [OpenAI::Client] the OpenAI client
46
- # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider
47
- def self.wrap_chat(client, tracer_provider)
48
- # Create a wrapper module that intercepts the chat method
49
- wrapper = Module.new do
50
- define_method(:chat) do |parameters:|
51
- tracer = tracer_provider.tracer("braintrust")
52
-
53
- tracer.in_span("openai.chat.completions.create") do |span|
54
- # Initialize metadata hash
55
- metadata = {
56
- "provider" => "openai",
57
- "endpoint" => "/v1/chat/completions"
58
- }
59
-
60
- # Capture request metadata fields
61
- metadata_fields = %w[
62
- model frequency_penalty logit_bias logprobs max_tokens n
63
- presence_penalty response_format seed service_tier stop
64
- stream stream_options temperature top_p top_logprobs
65
- tools tool_choice parallel_tool_calls user functions function_call
66
- ]
67
-
68
- metadata_fields.each do |field|
69
- field_sym = field.to_sym
70
- if parameters.key?(field_sym)
71
- # Special handling for stream parameter (it's a Proc)
72
- metadata[field] = if field == "stream"
73
- true # Just mark as streaming
74
- else
75
- parameters[field_sym]
76
- end
151
+ def self.wrap_chat(client)
152
+ client.singleton_class.prepend(ChatWrapper)
153
+ end
154
+
155
+ # Wrap responses API
156
+ # @param client [OpenAI::Client] the OpenAI client
157
+ def self.wrap_responses(client)
158
+ # Store tracer provider on the responses object for use by wrapper module
159
+ responses_obj = client.responses
160
+ responses_obj.instance_variable_set(:@braintrust_tracer_provider, client.instance_variable_get(:@braintrust_tracer_provider))
161
+ responses_obj.singleton_class.prepend(ResponsesCreateWrapper)
162
+ end
163
+
164
+ # Wrapper module for chat completions
165
+ module ChatWrapper
166
+ def chat(parameters:)
167
+ tracer_provider = @braintrust_tracer_provider
168
+ tracer = tracer_provider.tracer("braintrust")
169
+
170
+ tracer.in_span("Chat Completion") do |span|
171
+ # Track start time for time_to_first_token
172
+ start_time = Time.now
173
+ time_to_first_token = nil
174
+ is_streaming = parameters.key?(:stream) && parameters[:stream].is_a?(Proc)
175
+
176
+ # Initialize metadata hash
177
+ metadata = {
178
+ "provider" => "openai",
179
+ "endpoint" => "/v1/chat/completions"
180
+ }
181
+
182
+ # Capture request metadata fields
183
+ metadata_fields = %w[
184
+ model frequency_penalty logit_bias logprobs max_tokens n
185
+ presence_penalty response_format seed service_tier stop
186
+ stream stream_options temperature top_p top_logprobs
187
+ tools tool_choice parallel_tool_calls user functions function_call
188
+ ]
189
+
190
+ metadata_fields.each do |field|
191
+ field_sym = field.to_sym
192
+ if parameters.key?(field_sym)
193
+ # Special handling for stream parameter (it's a Proc)
194
+ metadata[field] = if field == "stream"
195
+ true # Just mark as streaming
196
+ else
197
+ parameters[field_sym]
77
198
  end
78
199
  end
200
+ end
201
+
202
+ # Set input messages as JSON
203
+ if parameters[:messages]
204
+ RubyOpenAI.set_json_attr(span, "braintrust.input_json", parameters[:messages])
205
+ end
79
206
 
80
- # Set input messages as JSON
81
- if parameters[:messages]
82
- span.set_attribute("braintrust.input_json", JSON.generate(parameters[:messages]))
207
+ # Wrap streaming callback if present to capture time to first token and aggregate chunks
208
+ aggregated_chunks = []
209
+ if is_streaming
210
+ original_stream_proc = parameters[:stream]
211
+ parameters = parameters.dup
212
+ parameters[:stream] = proc do |chunk, bytesize|
213
+ # Capture time to first token on first chunk
214
+ time_to_first_token ||= Time.now - start_time
215
+ # Aggregate chunks for later processing
216
+ aggregated_chunks << chunk
217
+ # Call original callback
218
+ original_stream_proc.call(chunk, bytesize)
83
219
  end
220
+ end
221
+
222
+ begin
223
+ # Call the original method
224
+ response = super(parameters: parameters)
225
+
226
+ # Calculate time to first token for non-streaming
227
+ time_to_first_token ||= Time.now - start_time unless is_streaming
228
+
229
+ # Process response data
230
+ if is_streaming && !aggregated_chunks.empty?
231
+ # Aggregate streaming chunks into response-like structure
232
+ aggregated_response = RubyOpenAI.aggregate_streaming_chunks(aggregated_chunks)
233
+ RubyOpenAI.set_span_attributes(span, aggregated_response, time_to_first_token, metadata)
234
+ else
235
+ # Non-streaming: use response object directly
236
+ RubyOpenAI.set_span_attributes(span, response || {}, time_to_first_token, metadata)
237
+ end
238
+
239
+ # Set metadata ONCE at the end with complete hash
240
+ RubyOpenAI.set_json_attr(span, "braintrust.metadata", metadata)
84
241
 
85
- begin
86
- # Call the original method
87
- response = super(parameters: parameters)
242
+ response
243
+ rescue => e
244
+ # Record exception in span
245
+ span.record_exception(e)
246
+ span.status = OpenTelemetry::Trace::Status.error("Exception: #{e.class} - #{e.message}")
247
+ raise
248
+ end
249
+ end
250
+ end
251
+ end
252
+
253
+ # Wrapper module for responses API create method
254
+ module ResponsesCreateWrapper
255
+ def create(parameters:)
256
+ tracer_provider = @braintrust_tracer_provider
257
+ tracer = tracer_provider.tracer("braintrust")
88
258
 
89
- # Set output (choices) as JSON
90
- if response && response["choices"]&.any?
91
- span.set_attribute("braintrust.output_json", JSON.generate(response["choices"]))
259
+ tracer.in_span("openai.responses.create") do |span|
260
+ # Track start time for time_to_first_token
261
+ start_time = Time.now
262
+ time_to_first_token = nil
263
+ is_streaming = parameters.key?(:stream) && parameters[:stream].is_a?(Proc)
264
+
265
+ # Initialize metadata hash
266
+ metadata = {
267
+ "provider" => "openai",
268
+ "endpoint" => "/v1/responses"
269
+ }
270
+
271
+ # Capture request metadata fields
272
+ metadata_fields = %w[
273
+ model instructions modalities tools parallel_tool_calls
274
+ tool_choice temperature max_tokens top_p frequency_penalty
275
+ presence_penalty seed user store response_format
276
+ reasoning previous_response_id truncation
277
+ ]
278
+
279
+ metadata_fields.each do |field|
280
+ field_sym = field.to_sym
281
+ if parameters.key?(field_sym)
282
+ metadata[field] = parameters[field_sym]
283
+ end
284
+ end
285
+
286
+ # Mark as streaming if applicable
287
+ metadata["stream"] = true if is_streaming
288
+
289
+ # Set input as JSON
290
+ if parameters[:input]
291
+ RubyOpenAI.set_json_attr(span, "braintrust.input_json", parameters[:input])
292
+ end
293
+
294
+ # Wrap streaming callback if present to capture time to first token and aggregate chunks
295
+ aggregated_chunks = []
296
+ if is_streaming
297
+ original_stream_proc = parameters[:stream]
298
+ parameters = parameters.dup
299
+ parameters[:stream] = proc do |chunk, event|
300
+ # Capture time to first token on first chunk
301
+ time_to_first_token ||= Time.now - start_time
302
+ # Aggregate chunks for later processing
303
+ aggregated_chunks << chunk
304
+ # Call original callback
305
+ original_stream_proc.call(chunk, event)
306
+ end
307
+ end
308
+
309
+ begin
310
+ # Call the original method
311
+ response = super(parameters: parameters)
312
+
313
+ # Calculate time to first token for non-streaming
314
+ time_to_first_token ||= Time.now - start_time unless is_streaming
315
+
316
+ # Process response data
317
+ if is_streaming && !aggregated_chunks.empty?
318
+ # Aggregate streaming chunks into response-like structure
319
+ aggregated_response = RubyOpenAI.aggregate_responses_chunks(aggregated_chunks)
320
+
321
+ # Set output as JSON
322
+ if aggregated_response["output"]
323
+ RubyOpenAI.set_json_attr(span, "braintrust.output_json", aggregated_response["output"])
92
324
  end
93
325
 
94
- # Set metrics (token usage)
95
- if response && response["usage"]
96
- metrics = Braintrust::Trace::Contrib::Github::Alexrudall::RubyOpenAI.parse_usage_tokens(response["usage"])
97
- span.set_attribute("braintrust.metrics", JSON.generate(metrics)) unless metrics.empty?
326
+ # Set metrics (token usage + time_to_first_token)
327
+ metrics = {}
328
+ if aggregated_response["usage"]
329
+ metrics = RubyOpenAI.parse_usage_tokens(aggregated_response["usage"])
98
330
  end
331
+ metrics["time_to_first_token"] = time_to_first_token || 0.0
332
+ RubyOpenAI.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
99
333
 
100
- # Add response metadata fields
101
- if response
102
- metadata["id"] = response["id"] if response["id"]
103
- metadata["created"] = response["created"] if response["created"]
104
- metadata["system_fingerprint"] = response["system_fingerprint"] if response["system_fingerprint"]
105
- metadata["service_tier"] = response["service_tier"] if response["service_tier"]
334
+ # Update metadata with response fields
335
+ metadata["id"] = aggregated_response["id"] if aggregated_response["id"]
336
+ else
337
+ # Non-streaming: use response object directly
338
+ if response && response["output"]
339
+ RubyOpenAI.set_json_attr(span, "braintrust.output_json", response["output"])
106
340
  end
107
341
 
108
- # Set metadata ONCE at the end with complete hash
109
- span.set_attribute("braintrust.metadata", JSON.generate(metadata))
342
+ # Set metrics (token usage + time_to_first_token)
343
+ metrics = {}
344
+ if response && response["usage"]
345
+ metrics = RubyOpenAI.parse_usage_tokens(response["usage"])
346
+ end
347
+ metrics["time_to_first_token"] = time_to_first_token || 0.0
348
+ RubyOpenAI.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
110
349
 
111
- response
112
- rescue => e
113
- # Record exception in span
114
- span.record_exception(e)
115
- span.status = OpenTelemetry::Trace::Status.error("Exception: #{e.class} - #{e.message}")
116
- raise
350
+ # Update metadata with response fields
351
+ metadata["id"] = response["id"] if response && response["id"]
117
352
  end
353
+
354
+ # Set metadata ONCE at the end with complete hash
355
+ RubyOpenAI.set_json_attr(span, "braintrust.metadata", metadata)
356
+
357
+ response
358
+ rescue => e
359
+ # Record exception in span
360
+ span.record_exception(e)
361
+ span.status = OpenTelemetry::Trace::Status.error("Exception: #{e.class} - #{e.message}")
362
+ raise
118
363
  end
119
364
  end
120
365
  end
121
-
122
- # Prepend the wrapper to the client's singleton class
123
- client.singleton_class.prepend(wrapper)
124
366
  end
125
367
  end
126
368
  end
@@ -71,15 +71,59 @@ module Braintrust
71
71
  # Check if already wrapped to make this idempotent
72
72
  return chat if chat.instance_variable_get(:@braintrust_wrapped)
73
73
 
74
- # Create a wrapper module that intercepts chat.ask
74
+ # Create a wrapper module that intercepts chat.complete
75
75
  wrapper = create_wrapper_module(tracer_provider)
76
76
 
77
77
  # Mark as wrapped and prepend the wrapper to the chat instance
78
78
  chat.instance_variable_set(:@braintrust_wrapped, true)
79
79
  chat.singleton_class.prepend(wrapper)
80
+
81
+ # Register tool callbacks for tool span creation
82
+ register_tool_callbacks(chat, tracer_provider)
83
+
80
84
  chat
81
85
  end
82
86
 
87
+ # Register callbacks for tool execution tracing
88
+ # @param chat [RubyLLM::Chat] the chat instance
89
+ # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider
90
+ def self.register_tool_callbacks(chat, tracer_provider)
91
+ tracer = tracer_provider.tracer("braintrust")
92
+
93
+ # Track tool spans by tool_call_id
94
+ tool_spans = {}
95
+
96
+ # Start tool span when tool is called
97
+ chat.on_tool_call do |tool_call|
98
+ span = tracer.start_span("ruby_llm.tool.#{tool_call.name}")
99
+ set_json_attr(span, "braintrust.span_attributes", {type: "tool"})
100
+ span.set_attribute("tool.name", tool_call.name)
101
+ span.set_attribute("tool.call_id", tool_call.id)
102
+
103
+ # Store tool input
104
+ input = {
105
+ "name" => tool_call.name,
106
+ "arguments" => tool_call.arguments
107
+ }
108
+ set_json_attr(span, "braintrust.input_json", input)
109
+
110
+ tool_spans[tool_call.id] = span
111
+ end
112
+
113
+ # End tool span when result is received
114
+ chat.on_tool_result do |result|
115
+ # Find the most recent tool span (RubyLLM doesn't pass tool_call_id to on_tool_result)
116
+ # The spans are processed in order, so we can use the first unfinished one
117
+ tool_call_id, span = tool_spans.find { |_id, s| s }
118
+ if span
119
+ # Store tool output
120
+ set_json_attr(span, "braintrust.output_json", result)
121
+ span.finish
122
+ tool_spans.delete(tool_call_id)
123
+ end
124
+ end
125
+ end
126
+
83
127
  # Unwrap RubyLLM to remove Braintrust tracing
84
128
  # For class-level unwrapping, removes the initialize override from the wrapper module
85
129
  # For instance-level unwrapping, clears the wrapped flag
@@ -116,50 +160,75 @@ module Braintrust
116
160
  ::RubyLLM::Chat.prepend(wrapper)
117
161
  end
118
162
 
119
- # Create the wrapper module that intercepts chat.ask
163
+ # Create the wrapper module that intercepts chat.complete
164
+ # We wrap complete() instead of ask() because:
165
+ # - ask() internally calls complete() for the actual API call
166
+ # - ActiveRecord integration (acts_as_chat) calls complete() directly
167
+ # - This ensures all LLM calls are traced regardless of entry point
168
+ #
169
+ # Important: RubyLLM's complete() calls itself recursively for tool execution.
170
+ # We only create a span for the outermost call to avoid duplicate spans.
171
+ # Tool execution is traced separately via on_tool_call/on_tool_result callbacks.
172
+ #
120
173
  # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider
121
174
  # @return [Module] the wrapper module
122
175
  def self.create_wrapper_module(tracer_provider)
123
176
  Module.new do
124
- define_method(:ask) do |prompt = nil, **params, &block|
177
+ define_method(:complete) do |&block|
178
+ # Check if we're already inside a traced complete() call
179
+ # If so, just call super without creating a new span
180
+ if @braintrust_in_complete
181
+ if block
182
+ return super(&block)
183
+ else
184
+ return super()
185
+ end
186
+ end
187
+
125
188
  tracer = tracer_provider.tracer("braintrust")
126
189
 
127
- if block
128
- # Handle streaming request
129
- wrapped_block = proc do |chunk|
130
- block.call(chunk)
131
- end
132
- Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.handle_streaming_ask(self, tracer, prompt, params, block) do |aggregated_chunks|
133
- super(prompt, **params) do |chunk|
134
- aggregated_chunks << chunk
135
- wrapped_block.call(chunk)
190
+ # Mark that we're inside a complete() call
191
+ @braintrust_in_complete = true
192
+
193
+ begin
194
+ if block
195
+ # Handle streaming request
196
+ wrapped_block = proc do |chunk|
197
+ block.call(chunk)
198
+ end
199
+ Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.handle_streaming_complete(self, tracer, block) do |aggregated_chunks|
200
+ super(&proc do |chunk|
201
+ aggregated_chunks << chunk
202
+ wrapped_block.call(chunk)
203
+ end)
204
+ end
205
+ else
206
+ # Handle non-streaming request
207
+ Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.handle_non_streaming_complete(self, tracer) do
208
+ super()
136
209
  end
137
210
  end
138
- else
139
- # Handle non-streaming request
140
- Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.handle_non_streaming_ask(self, tracer, prompt, params) do
141
- super(prompt, **params)
142
- end
211
+ ensure
212
+ @braintrust_in_complete = false
143
213
  end
144
214
  end
145
215
  end
146
216
  end
147
217
 
148
- # Handle streaming chat request with tracing
218
+ # Handle streaming complete request with tracing
149
219
  # @param chat [RubyLLM::Chat] the chat instance
150
220
  # @param tracer [OpenTelemetry::Trace::Tracer] the tracer
151
- # @param prompt [String, nil] the user prompt
152
- # @param params [Hash] additional parameters
153
221
  # @param block [Proc] the streaming block
154
- def self.handle_streaming_ask(chat, tracer, prompt, params, block)
222
+ def self.handle_streaming_complete(chat, tracer, block)
155
223
  # Start span immediately for accurate timing
156
- span = tracer.start_span("ruby_llm.chat.ask")
224
+ span = tracer.start_span("ruby_llm.chat")
157
225
 
158
226
  aggregated_chunks = []
159
227
 
160
228
  # Extract metadata and build input messages
229
+ # For complete(), messages are already in chat history (no prompt param)
161
230
  metadata = extract_metadata(chat, stream: true)
162
- input_messages = build_input_messages(chat, prompt)
231
+ input_messages = build_input_messages(chat, nil)
163
232
 
164
233
  # Set input and metadata
165
234
  set_json_attr(span, "braintrust.input_json", input_messages) if input_messages.any?
@@ -181,19 +250,18 @@ module Braintrust
181
250
  result
182
251
  end
183
252
 
184
- # Handle non-streaming chat request with tracing
253
+ # Handle non-streaming complete request with tracing
185
254
  # @param chat [RubyLLM::Chat] the chat instance
186
255
  # @param tracer [OpenTelemetry::Trace::Tracer] the tracer
187
- # @param prompt [String, nil] the user prompt
188
- # @param params [Hash] additional parameters
189
- def self.handle_non_streaming_ask(chat, tracer, prompt, params)
256
+ def self.handle_non_streaming_complete(chat, tracer)
190
257
  # Start span immediately for accurate timing
191
- span = tracer.start_span("ruby_llm.chat.ask")
258
+ span = tracer.start_span("ruby_llm.chat")
192
259
 
193
260
  begin
194
261
  # Extract metadata and build input messages
262
+ # For complete(), messages are already in chat history (no prompt param)
195
263
  metadata = extract_metadata(chat)
196
- input_messages = build_input_messages(chat, prompt)
264
+ input_messages = build_input_messages(chat, nil)
197
265
  set_json_attr(span, "braintrust.input_json", input_messages) if input_messages.any?
198
266
 
199
267
  # Remember message count before the call (for tool call detection)
@@ -321,23 +389,62 @@ module Braintrust
321
389
  end
322
390
 
323
391
  # Build input messages array from chat history and prompt
392
+ # Formats messages to match OpenAI's message format
324
393
  # @param chat [RubyLLM::Chat] the chat instance
325
394
  # @param prompt [String, nil] the user prompt
326
395
  # @return [Array<Hash>] array of message hashes
327
396
  def self.build_input_messages(chat, prompt)
328
397
  input_messages = []
329
398
 
330
- # Add conversation history
399
+ # Add conversation history, formatting each message to OpenAI format
331
400
  if chat.respond_to?(:messages) && chat.messages&.any?
332
- input_messages = chat.messages.map { |m| m.respond_to?(:to_h) ? m.to_h : m }
401
+ input_messages = chat.messages.map { |m| format_message_for_input(m) }
333
402
  end
334
403
 
335
404
  # Add current prompt
336
- input_messages << {role: "user", content: prompt} if prompt
405
+ input_messages << {"role" => "user", "content" => prompt} if prompt
337
406
 
338
407
  input_messages
339
408
  end
340
409
 
410
+ # Format a RubyLLM message to OpenAI-compatible format
411
+ # @param msg [Object] the RubyLLM message
412
+ # @return [Hash] OpenAI-formatted message
413
+ def self.format_message_for_input(msg)
414
+ formatted = {
415
+ "role" => msg.role.to_s
416
+ }
417
+
418
+ # Handle content
419
+ if msg.respond_to?(:content) && msg.content
420
+ # Convert Ruby hash notation to JSON string for tool results
421
+ content = msg.content
422
+ if msg.role.to_s == "tool" && content.is_a?(String) && content.start_with?("{:")
423
+ # Ruby hash string like "{:location=>...}" - try to parse and re-serialize as JSON
424
+ begin
425
+ # Simple conversion: replace Ruby hash syntax with JSON
426
+ content = content.gsub(/(?<=\{|, ):(\w+)=>/, '"\1":').gsub("=>", ":")
427
+ rescue
428
+ # Keep original if conversion fails
429
+ end
430
+ end
431
+ formatted["content"] = content
432
+ end
433
+
434
+ # Handle tool_calls for assistant messages
435
+ if msg.respond_to?(:tool_calls) && msg.tool_calls&.any?
436
+ formatted["tool_calls"] = format_tool_calls(msg.tool_calls)
437
+ formatted["content"] = nil
438
+ end
439
+
440
+ # Handle tool_call_id for tool result messages
441
+ if msg.respond_to?(:tool_call_id) && msg.tool_call_id
442
+ formatted["tool_call_id"] = msg.tool_call_id
443
+ end
444
+
445
+ formatted
446
+ end
447
+
341
448
  # Capture streaming output and metrics
342
449
  # @param span [OpenTelemetry::Trace::Span] the span
343
450
  # @param aggregated_chunks [Array] the aggregated chunks
@@ -383,8 +490,9 @@ module Braintrust
383
490
  end
384
491
 
385
492
  # Check if there are tool calls in the messages history
493
+ # Look at messages added during this complete() call
386
494
  if chat.respond_to?(:messages) && chat.messages
387
- assistant_msg = chat.messages[(messages_before_count + 1)..].find { |m|
495
+ assistant_msg = chat.messages[messages_before_count..].find { |m|
388
496
  m.role.to_s == "assistant" && m.respond_to?(:tool_calls) && m.tool_calls&.any?
389
497
  }
390
498
 
@@ -155,7 +155,10 @@ module Braintrust
155
155
  define_method(:create) do |**params|
156
156
  tracer = tracer_provider.tracer("braintrust")
157
157
 
158
- tracer.in_span("openai.chat.completions.create") do |span|
158
+ tracer.in_span("Chat Completion") do |span|
159
+ # Track start time for time_to_first_token
160
+ start_time = Time.now
161
+
159
162
  # Initialize metadata hash
160
163
  metadata = {
161
164
  "provider" => "openai",
@@ -184,6 +187,9 @@ module Braintrust
184
187
  # Call the original method
185
188
  response = super(**params)
186
189
 
190
+ # Calculate time to first token
191
+ time_to_first_token = Time.now - start_time
192
+
187
193
  # Set output (choices) as JSON
188
194
  # Use to_h to get the raw structure with all fields (including tool_calls)
189
195
  if response.respond_to?(:choices) && response.choices&.any?
@@ -192,10 +198,13 @@ module Braintrust
192
198
  end
193
199
 
194
200
  # Set metrics (token usage with advanced details)
201
+ metrics = {}
195
202
  if response.respond_to?(:usage) && response.usage
196
203
  metrics = Braintrust::Trace::OpenAI.parse_usage_tokens(response.usage)
197
- span.set_attribute("braintrust.metrics", JSON.generate(metrics)) unless metrics.empty?
198
204
  end
205
+ # Add time_to_first_token metric
206
+ metrics["time_to_first_token"] = time_to_first_token
207
+ span.set_attribute("braintrust.metrics", JSON.generate(metrics)) unless metrics.empty?
199
208
 
200
209
  # Add response metadata fields
201
210
  metadata["id"] = response.id if response.respond_to?(:id) && response.id
@@ -214,13 +223,15 @@ module Braintrust
214
223
  define_method(:stream_raw) do |**params|
215
224
  tracer = tracer_provider.tracer("braintrust")
216
225
  aggregated_chunks = []
226
+ start_time = Time.now
227
+ time_to_first_token = nil
217
228
  metadata = {
218
229
  "provider" => "openai",
219
230
  "endpoint" => "/v1/chat/completions"
220
231
  }
221
232
 
222
233
  # Start span with proper context (will be child of current span if any)
223
- span = tracer.start_span("openai.chat.completions.create")
234
+ span = tracer.start_span("Chat Completion")
224
235
 
225
236
  # Capture request metadata fields
226
237
  metadata_fields = %i[
@@ -259,6 +270,8 @@ module Braintrust
259
270
  original_each = stream.method(:each)
260
271
  stream.define_singleton_method(:each) do |&block|
261
272
  original_each.call do |chunk|
273
+ # Capture time to first token on first chunk
274
+ time_to_first_token ||= Time.now - start_time
262
275
  aggregated_chunks << chunk.to_h
263
276
  block&.call(chunk)
264
277
  end
@@ -275,10 +288,13 @@ module Braintrust
275
288
  Braintrust::Trace::OpenAI.set_json_attr(span, "braintrust.output_json", aggregated_output[:choices])
276
289
 
277
290
  # Set metrics if usage is included (requires stream_options.include_usage)
291
+ metrics = {}
278
292
  if aggregated_output[:usage]
279
293
  metrics = Braintrust::Trace::OpenAI.parse_usage_tokens(aggregated_output[:usage])
280
- Braintrust::Trace::OpenAI.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
281
294
  end
295
+ # Add time_to_first_token metric
296
+ metrics["time_to_first_token"] = time_to_first_token || 0.0
297
+ Braintrust::Trace::OpenAI.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
282
298
 
283
299
  # Update metadata with response fields
284
300
  metadata["id"] = aggregated_output[:id] if aggregated_output[:id]
@@ -297,13 +313,15 @@ module Braintrust
297
313
  # Wrap stream for streaming chat completions (returns ChatCompletionStream with convenience methods)
298
314
  define_method(:stream) do |**params|
299
315
  tracer = tracer_provider.tracer("braintrust")
316
+ start_time = Time.now
317
+ time_to_first_token = nil
300
318
  metadata = {
301
319
  "provider" => "openai",
302
320
  "endpoint" => "/v1/chat/completions"
303
321
  }
304
322
 
305
323
  # Start span with proper context (will be child of current span if any)
306
- span = tracer.start_span("openai.chat.completions.create")
324
+ span = tracer.start_span("Chat Completion")
307
325
 
308
326
  # Capture request metadata fields
309
327
  metadata_fields = %i[
@@ -354,10 +372,13 @@ module Braintrust
354
372
  end
355
373
 
356
374
  # Set metrics if usage is available
375
+ metrics = {}
357
376
  if snapshot.usage
358
377
  metrics = Braintrust::Trace::OpenAI.parse_usage_tokens(snapshot.usage)
359
- set_json_attr.call("braintrust.metrics", metrics) unless metrics.empty?
360
378
  end
379
+ # Add time_to_first_token metric
380
+ metrics["time_to_first_token"] = time_to_first_token || 0.0
381
+ set_json_attr.call("braintrust.metrics", metrics) unless metrics.empty?
361
382
 
362
383
  # Update metadata with response fields
363
384
  metadata["id"] = snapshot.id if snapshot.respond_to?(:id) && snapshot.id
@@ -378,7 +399,11 @@ module Braintrust
378
399
  # Wrap .each() method - this is the core consumption method
379
400
  original_each = stream.method(:each)
380
401
  stream.define_singleton_method(:each) do |&block|
381
- original_each.call(&block)
402
+ original_each.call do |chunk|
403
+ # Capture time to first token on first chunk
404
+ time_to_first_token ||= Time.now - start_time
405
+ block&.call(chunk)
406
+ end
382
407
  rescue => e
383
408
  span.record_exception(e)
384
409
  span.status = ::OpenTelemetry::Trace::Status.error("Streaming error: #{e.message}")
@@ -392,8 +417,13 @@ module Braintrust
392
417
  stream.define_singleton_method(:text) do
393
418
  text_enum = original_text.call
394
419
  # Wrap the returned enumerable's .each method
420
+ original_text_each = text_enum.method(:each)
395
421
  text_enum.define_singleton_method(:each) do |&block|
396
- super(&block)
422
+ original_text_each.call do |delta|
423
+ # Capture time to first token on first delta
424
+ time_to_first_token ||= Time.now - start_time
425
+ block&.call(delta)
426
+ end
397
427
  rescue => e
398
428
  span.record_exception(e)
399
429
  span.status = ::OpenTelemetry::Trace::Status.error("Streaming error: #{e.message}")
@@ -14,16 +14,24 @@ module Braintrust
14
14
  return metrics unless usage_hash.is_a?(Hash)
15
15
 
16
16
  # Field mappings: OpenAI → Braintrust
17
+ # Supports both Chat Completions API (prompt_tokens, completion_tokens)
18
+ # and Responses API (input_tokens, output_tokens)
17
19
  field_map = {
18
20
  "prompt_tokens" => "prompt_tokens",
19
21
  "completion_tokens" => "completion_tokens",
20
- "total_tokens" => "tokens"
22
+ "total_tokens" => "tokens",
23
+ # Responses API uses different field names
24
+ "input_tokens" => "prompt_tokens",
25
+ "output_tokens" => "completion_tokens"
21
26
  }
22
27
 
23
28
  # Prefix mappings for *_tokens_details
24
29
  prefix_map = {
25
30
  "prompt" => "prompt",
26
- "completion" => "completion"
31
+ "completion" => "completion",
32
+ # Responses API uses input/output prefixes
33
+ "input" => "prompt",
34
+ "output" => "completion"
27
35
  }
28
36
 
29
37
  usage_hash.each do |key, value|
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Braintrust
4
- VERSION = "0.0.6"
4
+ VERSION = "0.0.8"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: braintrust
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Braintrust