RubyGems - braintrust - Versions diffs - 0.0.6 → 0.0.8 - Mend

braintrust 0.0.6 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml +4 -4
data/lib/braintrust/trace/contrib/github.com/alexrudall/ruby-openai/ruby-openai.rb +304 -62
data/lib/braintrust/trace/contrib/github.com/crmne/ruby_llm.rb +141 -33
data/lib/braintrust/trace/contrib/openai.rb +38 -8
data/lib/braintrust/trace/tokens.rb +10 -2
data/lib/braintrust/version.rb +1 -1
metadata +1 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 866cb2e797502f00cda1625ad90f4d734b4b83f0d21d8243675a933fae9df693
-  data.tar.gz: f74151b0e18b12cf19b61b1b75b2f58e784d4171f21c0996526d29c719174260
+  metadata.gz: c6b2bcda06084f2e90d2602659ca71cf0ab574ac8c74c367890cbb2b04740529
+  data.tar.gz: 306b5a46660eae3d3e3811d021627883419a4dc4c114e51e40be64c590868c95
 SHA512:
-  metadata.gz: ad2f68a6de8d547b6a609c3393522c4ae3dfcb441a9fc841484bbbcb21de7648da7a00cd625612d98c6b99e4ad41186a2bc3fff706e17b9797e7ac514e685923
-  data.tar.gz: f0613e5fa08c07333c74467ec7830a40f72905475e35becf7a2add077168c7554046aa9a3824fe24006870338163526e8d170cfd25727af5d53416283ae03714
+  metadata.gz: 1db7bf706b260762aa114eb5e8f844cb0567efd5a6f9d8cca03667111c0e89ff68f4e53b3a3adc6ad2192947602fc4b88a8e0057169ad7eff12ccb1c2ecb4951
+  data.tar.gz: bbc71c33bb28da124bd1cc61c8bf4f765ec2899a57bf604da624a04c42a7bfce508ed1eb78c4ec421da5038fbaf89086cee8d0b04998d223568df05e8640679f

data/lib/braintrust/trace/contrib/github.com/alexrudall/ruby-openai/ruby-openai.rb CHANGED Viewed

@@ -28,6 +28,105 @@ module Braintrust
               Braintrust::Trace.parse_openai_usage_tokens(usage)
             end
+            # Aggregate streaming chunks into a single response structure
+            # @param chunks [Array<Hash>] array of chunk hashes from stream
+            # @return [Hash] aggregated response with choices, usage, id, created, model
+            def self.aggregate_streaming_chunks(chunks)
+              return {} if chunks.empty?
+              # Initialize aggregated structure
+              aggregated = {
+                "id" => nil,
+                "created" => nil,
+                "model" => nil,
+                "usage" => nil,
+                "choices" => []
+              }
+              # Track aggregated content for the first choice
+              role = nil
+              content = +""
+              chunks.each do |chunk|
+                # Capture top-level fields from any chunk that has them
+                aggregated["id"] ||= chunk["id"]
+                aggregated["created"] ||= chunk["created"]
+                aggregated["model"] ||= chunk["model"]
+                # Aggregate usage (usually only in last chunk if stream_options.include_usage is set)
+                aggregated["usage"] = chunk["usage"] if chunk["usage"]
+                # Aggregate content from first choice
+                if chunk.dig("choices", 0, "delta", "role")
+                  role ||= chunk.dig("choices", 0, "delta", "role")
+                end
+                if chunk.dig("choices", 0, "delta", "content")
+                  content << chunk.dig("choices", 0, "delta", "content")
+                end
+              end
+              # Build aggregated choices array
+              aggregated["choices"] = [
+                {
+                  "index" => 0,
+                  "message" => {
+                    "role" => role || "assistant",
+                    "content" => content
+                  },
+                  "finish_reason" => chunks.dig(-1, "choices", 0, "finish_reason")
+                }
+              ]
+              aggregated
+            end
+            # Aggregate responses streaming chunks into a single response structure
+            # @param chunks [Array<Hash>] array of chunk hashes from stream
+            # @return [Hash] aggregated response with output, usage, id
+            def self.aggregate_responses_chunks(chunks)
+              return {} if chunks.empty?
+              # Find the response.completed event which has the final response
+              completed_chunk = chunks.find { |c| c["type"] == "response.completed" }
+              if completed_chunk && completed_chunk["response"]
+                response = completed_chunk["response"]
+                return {
+                  "id" => response["id"],
+                  "output" => response["output"],
+                  "usage" => response["usage"]
+                }
+              end
+              # Fallback if no completed event found
+              {}
+            end
+            # Set span attributes from response data (works for both streaming and non-streaming)
+            # @param span [OpenTelemetry::Trace::Span] the span to set attributes on
+            # @param response_data [Hash] response hash with keys: choices, usage, id, created, model, system_fingerprint, service_tier
+            # @param time_to_first_token [Float] time to first token in seconds
+            # @param metadata [Hash] metadata hash to update with response fields
+            def self.set_span_attributes(span, response_data, time_to_first_token, metadata)
+              # Set output (choices) as JSON
+              if response_data["choices"]&.any?
+                set_json_attr(span, "braintrust.output_json", response_data["choices"])
+              end
+              # Set metrics (token usage + time_to_first_token)
+              metrics = {}
+              if response_data["usage"]
+                metrics = parse_usage_tokens(response_data["usage"])
+              end
+              metrics["time_to_first_token"] = time_to_first_token || 0.0
+              set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
+              # Update metadata with response fields
+              %w[id created model system_fingerprint service_tier].each do |field|
+                metadata[field] = response_data[field] if response_data[field]
+              end
+            end
             # Wrap an OpenAI::Client (ruby-openai gem) to automatically create spans
             # Supports both synchronous and streaming requests
             # @param client [OpenAI::Client] the OpenAI client to wrap
@@ -35,92 +134,235 @@ module Braintrust
             def self.wrap(client, tracer_provider: nil)
               tracer_provider ||= ::OpenTelemetry.tracer_provider
+              # Store tracer provider on the client for use by wrapper modules
+              client.instance_variable_set(:@braintrust_tracer_provider, tracer_provider)
               # Wrap chat completions
-              wrap_chat(client, tracer_provider)
+              wrap_chat(client)
+              # Wrap responses API if available
+              wrap_responses(client) if client.respond_to?(:responses)
               client
             end
             # Wrap chat API
             # @param client [OpenAI::Client] the OpenAI client
-            # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider
-            def self.wrap_chat(client, tracer_provider)
-              # Create a wrapper module that intercepts the chat method
-              wrapper = Module.new do
-                define_method(:chat) do |parameters:|
-                  tracer = tracer_provider.tracer("braintrust")
-                  tracer.in_span("openai.chat.completions.create") do |span|
-                    # Initialize metadata hash
-                    metadata = {
-                      "provider" => "openai",
-                      "endpoint" => "/v1/chat/completions"
-                    }
-                    # Capture request metadata fields
-                    metadata_fields = %w[
-                      model frequency_penalty logit_bias logprobs max_tokens n
-                      presence_penalty response_format seed service_tier stop
-                      stream stream_options temperature top_p top_logprobs
-                      tools tool_choice parallel_tool_calls user functions function_call
-                    ]
-                    metadata_fields.each do |field|
-                      field_sym = field.to_sym
-                      if parameters.key?(field_sym)
-                        # Special handling for stream parameter (it's a Proc)
-                        metadata[field] = if field == "stream"
-                          true  # Just mark as streaming
-                        else
-                          parameters[field_sym]
-                        end
+            def self.wrap_chat(client)
+              client.singleton_class.prepend(ChatWrapper)
+            end
+            # Wrap responses API
+            # @param client [OpenAI::Client] the OpenAI client
+            def self.wrap_responses(client)
+              # Store tracer provider on the responses object for use by wrapper module
+              responses_obj = client.responses
+              responses_obj.instance_variable_set(:@braintrust_tracer_provider, client.instance_variable_get(:@braintrust_tracer_provider))
+              responses_obj.singleton_class.prepend(ResponsesCreateWrapper)
+            end
+            # Wrapper module for chat completions
+            module ChatWrapper
+              def chat(parameters:)
+                tracer_provider = @braintrust_tracer_provider
+                tracer = tracer_provider.tracer("braintrust")
+                tracer.in_span("Chat Completion") do |span|
+                  # Track start time for time_to_first_token
+                  start_time = Time.now
+                  time_to_first_token = nil
+                  is_streaming = parameters.key?(:stream) && parameters[:stream].is_a?(Proc)
+                  # Initialize metadata hash
+                  metadata = {
+                    "provider" => "openai",
+                    "endpoint" => "/v1/chat/completions"
+                  }
+                  # Capture request metadata fields
+                  metadata_fields = %w[
+                    model frequency_penalty logit_bias logprobs max_tokens n
+                    presence_penalty response_format seed service_tier stop
+                    stream stream_options temperature top_p top_logprobs
+                    tools tool_choice parallel_tool_calls user functions function_call
+                  ]
+                  metadata_fields.each do |field|
+                    field_sym = field.to_sym
+                    if parameters.key?(field_sym)
+                      # Special handling for stream parameter (it's a Proc)
+                      metadata[field] = if field == "stream"
+                        true  # Just mark as streaming
+                      else
+                        parameters[field_sym]
                       end
                     end
+                  end
+                  # Set input messages as JSON
+                  if parameters[:messages]
+                    RubyOpenAI.set_json_attr(span, "braintrust.input_json", parameters[:messages])
+                  end
-                    # Set input messages as JSON
-                    if parameters[:messages]
-                      span.set_attribute("braintrust.input_json", JSON.generate(parameters[:messages]))
+                  # Wrap streaming callback if present to capture time to first token and aggregate chunks
+                  aggregated_chunks = []
+                  if is_streaming
+                    original_stream_proc = parameters[:stream]
+                    parameters = parameters.dup
+                    parameters[:stream] = proc do |chunk, bytesize|
+                      # Capture time to first token on first chunk
+                      time_to_first_token ||= Time.now - start_time
+                      # Aggregate chunks for later processing
+                      aggregated_chunks << chunk
+                      # Call original callback
+                      original_stream_proc.call(chunk, bytesize)
                     end
+                  end
+                  begin
+                    # Call the original method
+                    response = super(parameters: parameters)
+                    # Calculate time to first token for non-streaming
+                    time_to_first_token ||= Time.now - start_time unless is_streaming
+                    # Process response data
+                    if is_streaming && !aggregated_chunks.empty?
+                      # Aggregate streaming chunks into response-like structure
+                      aggregated_response = RubyOpenAI.aggregate_streaming_chunks(aggregated_chunks)
+                      RubyOpenAI.set_span_attributes(span, aggregated_response, time_to_first_token, metadata)
+                    else
+                      # Non-streaming: use response object directly
+                      RubyOpenAI.set_span_attributes(span, response || {}, time_to_first_token, metadata)
+                    end
+                    # Set metadata ONCE at the end with complete hash
+                    RubyOpenAI.set_json_attr(span, "braintrust.metadata", metadata)
-                    begin
-                      # Call the original method
-                      response = super(parameters: parameters)
+                    response
+                  rescue => e
+                    # Record exception in span
+                    span.record_exception(e)
+                    span.status = OpenTelemetry::Trace::Status.error("Exception: #{e.class} - #{e.message}")
+                    raise
+                  end
+                end
+              end
+            end
+            # Wrapper module for responses API create method
+            module ResponsesCreateWrapper
+              def create(parameters:)
+                tracer_provider = @braintrust_tracer_provider
+                tracer = tracer_provider.tracer("braintrust")
-                      # Set output (choices) as JSON
-                      if response && response["choices"]&.any?
-                        span.set_attribute("braintrust.output_json", JSON.generate(response["choices"]))
+                tracer.in_span("openai.responses.create") do |span|
+                  # Track start time for time_to_first_token
+                  start_time = Time.now
+                  time_to_first_token = nil
+                  is_streaming = parameters.key?(:stream) && parameters[:stream].is_a?(Proc)
+                  # Initialize metadata hash
+                  metadata = {
+                    "provider" => "openai",
+                    "endpoint" => "/v1/responses"
+                  }
+                  # Capture request metadata fields
+                  metadata_fields = %w[
+                    model instructions modalities tools parallel_tool_calls
+                    tool_choice temperature max_tokens top_p frequency_penalty
+                    presence_penalty seed user store response_format
+                    reasoning previous_response_id truncation
+                  ]
+                  metadata_fields.each do |field|
+                    field_sym = field.to_sym
+                    if parameters.key?(field_sym)
+                      metadata[field] = parameters[field_sym]
+                    end
+                  end
+                  # Mark as streaming if applicable
+                  metadata["stream"] = true if is_streaming
+                  # Set input as JSON
+                  if parameters[:input]
+                    RubyOpenAI.set_json_attr(span, "braintrust.input_json", parameters[:input])
+                  end
+                  # Wrap streaming callback if present to capture time to first token and aggregate chunks
+                  aggregated_chunks = []
+                  if is_streaming
+                    original_stream_proc = parameters[:stream]
+                    parameters = parameters.dup
+                    parameters[:stream] = proc do |chunk, event|
+                      # Capture time to first token on first chunk
+                      time_to_first_token ||= Time.now - start_time
+                      # Aggregate chunks for later processing
+                      aggregated_chunks << chunk
+                      # Call original callback
+                      original_stream_proc.call(chunk, event)
+                    end
+                  end
+                  begin
+                    # Call the original method
+                    response = super(parameters: parameters)
+                    # Calculate time to first token for non-streaming
+                    time_to_first_token ||= Time.now - start_time unless is_streaming
+                    # Process response data
+                    if is_streaming && !aggregated_chunks.empty?
+                      # Aggregate streaming chunks into response-like structure
+                      aggregated_response = RubyOpenAI.aggregate_responses_chunks(aggregated_chunks)
+                      # Set output as JSON
+                      if aggregated_response["output"]
+                        RubyOpenAI.set_json_attr(span, "braintrust.output_json", aggregated_response["output"])
                       end
-                      # Set metrics (token usage)
-                      if response && response["usage"]
-                        metrics = Braintrust::Trace::Contrib::Github::Alexrudall::RubyOpenAI.parse_usage_tokens(response["usage"])
-                        span.set_attribute("braintrust.metrics", JSON.generate(metrics)) unless metrics.empty?
+                      # Set metrics (token usage + time_to_first_token)
+                      metrics = {}
+                      if aggregated_response["usage"]
+                        metrics = RubyOpenAI.parse_usage_tokens(aggregated_response["usage"])
                       end
+                      metrics["time_to_first_token"] = time_to_first_token || 0.0
+                      RubyOpenAI.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
-                      # Add response metadata fields
-                      if response
-                        metadata["id"] = response["id"] if response["id"]
-                        metadata["created"] = response["created"] if response["created"]
-                        metadata["system_fingerprint"] = response["system_fingerprint"] if response["system_fingerprint"]
-                        metadata["service_tier"] = response["service_tier"] if response["service_tier"]
+                      # Update metadata with response fields
+                      metadata["id"] = aggregated_response["id"] if aggregated_response["id"]
+                    else
+                      # Non-streaming: use response object directly
+                      if response && response["output"]
+                        RubyOpenAI.set_json_attr(span, "braintrust.output_json", response["output"])
                       end
-                      # Set metadata ONCE at the end with complete hash
-                      span.set_attribute("braintrust.metadata", JSON.generate(metadata))
+                      # Set metrics (token usage + time_to_first_token)
+                      metrics = {}
+                      if response && response["usage"]
+                        metrics = RubyOpenAI.parse_usage_tokens(response["usage"])
+                      end
+                      metrics["time_to_first_token"] = time_to_first_token || 0.0
+                      RubyOpenAI.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
-                      response
-                    rescue => e
-                      # Record exception in span
-                      span.record_exception(e)
-                      span.status = OpenTelemetry::Trace::Status.error("Exception: #{e.class} - #{e.message}")
-                      raise
+                      # Update metadata with response fields
+                      metadata["id"] = response["id"] if response && response["id"]
                     end
+                    # Set metadata ONCE at the end with complete hash
+                    RubyOpenAI.set_json_attr(span, "braintrust.metadata", metadata)
+                    response
+                  rescue => e
+                    # Record exception in span
+                    span.record_exception(e)
+                    span.status = OpenTelemetry::Trace::Status.error("Exception: #{e.class} - #{e.message}")
+                    raise
                   end
                 end
               end
-              # Prepend the wrapper to the client's singleton class
-              client.singleton_class.prepend(wrapper)
             end
           end
         end

data/lib/braintrust/trace/contrib/github.com/crmne/ruby_llm.rb CHANGED Viewed

@@ -71,15 +71,59 @@ module Braintrust
               # Check if already wrapped to make this idempotent
               return chat if chat.instance_variable_get(:@braintrust_wrapped)
-              # Create a wrapper module that intercepts chat.ask
+              # Create a wrapper module that intercepts chat.complete
               wrapper = create_wrapper_module(tracer_provider)
               # Mark as wrapped and prepend the wrapper to the chat instance
               chat.instance_variable_set(:@braintrust_wrapped, true)
               chat.singleton_class.prepend(wrapper)
+              # Register tool callbacks for tool span creation
+              register_tool_callbacks(chat, tracer_provider)
               chat
             end
+            # Register callbacks for tool execution tracing
+            # @param chat [RubyLLM::Chat] the chat instance
+            # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider
+            def self.register_tool_callbacks(chat, tracer_provider)
+              tracer = tracer_provider.tracer("braintrust")
+              # Track tool spans by tool_call_id
+              tool_spans = {}
+              # Start tool span when tool is called
+              chat.on_tool_call do |tool_call|
+                span = tracer.start_span("ruby_llm.tool.#{tool_call.name}")
+                set_json_attr(span, "braintrust.span_attributes", {type: "tool"})
+                span.set_attribute("tool.name", tool_call.name)
+                span.set_attribute("tool.call_id", tool_call.id)
+                # Store tool input
+                input = {
+                  "name" => tool_call.name,
+                  "arguments" => tool_call.arguments
+                }
+                set_json_attr(span, "braintrust.input_json", input)
+                tool_spans[tool_call.id] = span
+              end
+              # End tool span when result is received
+              chat.on_tool_result do |result|
+                # Find the most recent tool span (RubyLLM doesn't pass tool_call_id to on_tool_result)
+                # The spans are processed in order, so we can use the first unfinished one
+                tool_call_id, span = tool_spans.find { |_id, s| s }
+                if span
+                  # Store tool output
+                  set_json_attr(span, "braintrust.output_json", result)
+                  span.finish
+                  tool_spans.delete(tool_call_id)
+                end
+              end
+            end
             # Unwrap RubyLLM to remove Braintrust tracing
             # For class-level unwrapping, removes the initialize override from the wrapper module
             # For instance-level unwrapping, clears the wrapped flag
@@ -116,50 +160,75 @@ module Braintrust
               ::RubyLLM::Chat.prepend(wrapper)
             end
-            # Create the wrapper module that intercepts chat.ask
+            # Create the wrapper module that intercepts chat.complete
+            # We wrap complete() instead of ask() because:
+            # - ask() internally calls complete() for the actual API call
+            # - ActiveRecord integration (acts_as_chat) calls complete() directly
+            # - This ensures all LLM calls are traced regardless of entry point
+            #
+            # Important: RubyLLM's complete() calls itself recursively for tool execution.
+            # We only create a span for the outermost call to avoid duplicate spans.
+            # Tool execution is traced separately via on_tool_call/on_tool_result callbacks.
+            #
             # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider
             # @return [Module] the wrapper module
             def self.create_wrapper_module(tracer_provider)
               Module.new do
-                define_method(:ask) do |prompt = nil, **params, &block|
+                define_method(:complete) do |&block|
+                  # Check if we're already inside a traced complete() call
+                  # If so, just call super without creating a new span
+                  if @braintrust_in_complete
+                    if block
+                      return super(&block)
+                    else
+                      return super()
+                    end
+                  end
                   tracer = tracer_provider.tracer("braintrust")
-                  if block
-                    # Handle streaming request
-                    wrapped_block = proc do |chunk|
-                      block.call(chunk)
-                    end
-                    Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.handle_streaming_ask(self, tracer, prompt, params, block) do |aggregated_chunks|
-                      super(prompt, **params) do |chunk|
-                        aggregated_chunks << chunk
-                        wrapped_block.call(chunk)
+                  # Mark that we're inside a complete() call
+                  @braintrust_in_complete = true
+                  begin
+                    if block
+                      # Handle streaming request
+                      wrapped_block = proc do |chunk|
+                        block.call(chunk)
+                      end
+                      Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.handle_streaming_complete(self, tracer, block) do |aggregated_chunks|
+                        super(&proc do |chunk|
+                          aggregated_chunks << chunk
+                          wrapped_block.call(chunk)
+                        end)
+                      end
+                    else
+                      # Handle non-streaming request
+                      Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.handle_non_streaming_complete(self, tracer) do
+                        super()
                       end
                     end
-                  else
-                    # Handle non-streaming request
-                    Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.handle_non_streaming_ask(self, tracer, prompt, params) do
-                      super(prompt, **params)
-                    end
+                  ensure
+                    @braintrust_in_complete = false
                   end
                 end
               end
             end
-            # Handle streaming chat request with tracing
+            # Handle streaming complete request with tracing
             # @param chat [RubyLLM::Chat] the chat instance
             # @param tracer [OpenTelemetry::Trace::Tracer] the tracer
-            # @param prompt [String, nil] the user prompt
-            # @param params [Hash] additional parameters
             # @param block [Proc] the streaming block
-            def self.handle_streaming_ask(chat, tracer, prompt, params, block)
+            def self.handle_streaming_complete(chat, tracer, block)
               # Start span immediately for accurate timing
-              span = tracer.start_span("ruby_llm.chat.ask")
+              span = tracer.start_span("ruby_llm.chat")
               aggregated_chunks = []
               # Extract metadata and build input messages
+              # For complete(), messages are already in chat history (no prompt param)
               metadata = extract_metadata(chat, stream: true)
-              input_messages = build_input_messages(chat, prompt)
+              input_messages = build_input_messages(chat, nil)
               # Set input and metadata
               set_json_attr(span, "braintrust.input_json", input_messages) if input_messages.any?
@@ -181,19 +250,18 @@ module Braintrust
               result
             end
-            # Handle non-streaming chat request with tracing
+            # Handle non-streaming complete request with tracing
             # @param chat [RubyLLM::Chat] the chat instance
             # @param tracer [OpenTelemetry::Trace::Tracer] the tracer
-            # @param prompt [String, nil] the user prompt
-            # @param params [Hash] additional parameters
-            def self.handle_non_streaming_ask(chat, tracer, prompt, params)
+            def self.handle_non_streaming_complete(chat, tracer)
               # Start span immediately for accurate timing
-              span = tracer.start_span("ruby_llm.chat.ask")
+              span = tracer.start_span("ruby_llm.chat")
               begin
                 # Extract metadata and build input messages
+                # For complete(), messages are already in chat history (no prompt param)
                 metadata = extract_metadata(chat)
-                input_messages = build_input_messages(chat, prompt)
+                input_messages = build_input_messages(chat, nil)
                 set_json_attr(span, "braintrust.input_json", input_messages) if input_messages.any?
                 # Remember message count before the call (for tool call detection)
@@ -321,23 +389,62 @@ module Braintrust
             end
             # Build input messages array from chat history and prompt
+            # Formats messages to match OpenAI's message format
             # @param chat [RubyLLM::Chat] the chat instance
             # @param prompt [String, nil] the user prompt
             # @return [Array<Hash>] array of message hashes
             def self.build_input_messages(chat, prompt)
               input_messages = []
-              # Add conversation history
+              # Add conversation history, formatting each message to OpenAI format
               if chat.respond_to?(:messages) && chat.messages&.any?
-                input_messages = chat.messages.map { |m| m.respond_to?(:to_h) ? m.to_h : m }
+                input_messages = chat.messages.map { |m| format_message_for_input(m) }
               end
               # Add current prompt
-              input_messages << {role: "user", content: prompt} if prompt
+              input_messages << {"role" => "user", "content" => prompt} if prompt
               input_messages
             end
+            # Format a RubyLLM message to OpenAI-compatible format
+            # @param msg [Object] the RubyLLM message
+            # @return [Hash] OpenAI-formatted message
+            def self.format_message_for_input(msg)
+              formatted = {
+                "role" => msg.role.to_s
+              }
+              # Handle content
+              if msg.respond_to?(:content) && msg.content
+                # Convert Ruby hash notation to JSON string for tool results
+                content = msg.content
+                if msg.role.to_s == "tool" && content.is_a?(String) && content.start_with?("{:")
+                  # Ruby hash string like "{:location=>...}" - try to parse and re-serialize as JSON
+                  begin
+                    # Simple conversion: replace Ruby hash syntax with JSON
+                    content = content.gsub(/(?<=\{|, ):(\w+)=>/, '"\1":').gsub("=>", ":")
+                  rescue
+                    # Keep original if conversion fails
+                  end
+                end
+                formatted["content"] = content
+              end
+              # Handle tool_calls for assistant messages
+              if msg.respond_to?(:tool_calls) && msg.tool_calls&.any?
+                formatted["tool_calls"] = format_tool_calls(msg.tool_calls)
+                formatted["content"] = nil
+              end
+              # Handle tool_call_id for tool result messages
+              if msg.respond_to?(:tool_call_id) && msg.tool_call_id
+                formatted["tool_call_id"] = msg.tool_call_id
+              end
+              formatted
+            end
             # Capture streaming output and metrics
             # @param span [OpenTelemetry::Trace::Span] the span
             # @param aggregated_chunks [Array] the aggregated chunks
@@ -383,8 +490,9 @@ module Braintrust
               end
               # Check if there are tool calls in the messages history
+              # Look at messages added during this complete() call
               if chat.respond_to?(:messages) && chat.messages
-                assistant_msg = chat.messages[(messages_before_count + 1)..].find { |m|
+                assistant_msg = chat.messages[messages_before_count..].find { |m|
                   m.role.to_s == "assistant" && m.respond_to?(:tool_calls) && m.tool_calls&.any?
                 }

data/lib/braintrust/trace/contrib/openai.rb CHANGED Viewed

@@ -155,7 +155,10 @@ module Braintrust
           define_method(:create) do |**params|
             tracer = tracer_provider.tracer("braintrust")
-            tracer.in_span("openai.chat.completions.create") do |span|
+            tracer.in_span("Chat Completion") do |span|
+              # Track start time for time_to_first_token
+              start_time = Time.now
               # Initialize metadata hash
               metadata = {
                 "provider" => "openai",
@@ -184,6 +187,9 @@ module Braintrust
               # Call the original method
               response = super(**params)
+              # Calculate time to first token
+              time_to_first_token = Time.now - start_time
               # Set output (choices) as JSON
               # Use to_h to get the raw structure with all fields (including tool_calls)
               if response.respond_to?(:choices) && response.choices&.any?
@@ -192,10 +198,13 @@ module Braintrust
               end
               # Set metrics (token usage with advanced details)
+              metrics = {}
               if response.respond_to?(:usage) && response.usage
                 metrics = Braintrust::Trace::OpenAI.parse_usage_tokens(response.usage)
-                span.set_attribute("braintrust.metrics", JSON.generate(metrics)) unless metrics.empty?
               end
+              # Add time_to_first_token metric
+              metrics["time_to_first_token"] = time_to_first_token
+              span.set_attribute("braintrust.metrics", JSON.generate(metrics)) unless metrics.empty?
               # Add response metadata fields
               metadata["id"] = response.id if response.respond_to?(:id) && response.id
@@ -214,13 +223,15 @@ module Braintrust
           define_method(:stream_raw) do |**params|
             tracer = tracer_provider.tracer("braintrust")
             aggregated_chunks = []
+            start_time = Time.now
+            time_to_first_token = nil
             metadata = {
               "provider" => "openai",
               "endpoint" => "/v1/chat/completions"
             }
             # Start span with proper context (will be child of current span if any)
-            span = tracer.start_span("openai.chat.completions.create")
+            span = tracer.start_span("Chat Completion")
             # Capture request metadata fields
             metadata_fields = %i[
@@ -259,6 +270,8 @@ module Braintrust
             original_each = stream.method(:each)
             stream.define_singleton_method(:each) do |&block|
               original_each.call do |chunk|
+                # Capture time to first token on first chunk
+                time_to_first_token ||= Time.now - start_time
                 aggregated_chunks << chunk.to_h
                 block&.call(chunk)
               end
@@ -275,10 +288,13 @@ module Braintrust
                 Braintrust::Trace::OpenAI.set_json_attr(span, "braintrust.output_json", aggregated_output[:choices])
                 # Set metrics if usage is included (requires stream_options.include_usage)
+                metrics = {}
                 if aggregated_output[:usage]
                   metrics = Braintrust::Trace::OpenAI.parse_usage_tokens(aggregated_output[:usage])
-                  Braintrust::Trace::OpenAI.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
                 end
+                # Add time_to_first_token metric
+                metrics["time_to_first_token"] = time_to_first_token || 0.0
+                Braintrust::Trace::OpenAI.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
                 # Update metadata with response fields
                 metadata["id"] = aggregated_output[:id] if aggregated_output[:id]
@@ -297,13 +313,15 @@ module Braintrust
           # Wrap stream for streaming chat completions (returns ChatCompletionStream with convenience methods)
           define_method(:stream) do |**params|
             tracer = tracer_provider.tracer("braintrust")
+            start_time = Time.now
+            time_to_first_token = nil
             metadata = {
               "provider" => "openai",
               "endpoint" => "/v1/chat/completions"
             }
             # Start span with proper context (will be child of current span if any)
-            span = tracer.start_span("openai.chat.completions.create")
+            span = tracer.start_span("Chat Completion")
             # Capture request metadata fields
             metadata_fields = %i[
@@ -354,10 +372,13 @@ module Braintrust
               end
               # Set metrics if usage is available
+              metrics = {}
               if snapshot.usage
                 metrics = Braintrust::Trace::OpenAI.parse_usage_tokens(snapshot.usage)
-                set_json_attr.call("braintrust.metrics", metrics) unless metrics.empty?
               end
+              # Add time_to_first_token metric
+              metrics["time_to_first_token"] = time_to_first_token || 0.0
+              set_json_attr.call("braintrust.metrics", metrics) unless metrics.empty?
               # Update metadata with response fields
               metadata["id"] = snapshot.id if snapshot.respond_to?(:id) && snapshot.id
@@ -378,7 +399,11 @@ module Braintrust
             # Wrap .each() method - this is the core consumption method
             original_each = stream.method(:each)
             stream.define_singleton_method(:each) do |&block|
-              original_each.call(&block)
+              original_each.call do |chunk|
+                # Capture time to first token on first chunk
+                time_to_first_token ||= Time.now - start_time
+                block&.call(chunk)
+              end
             rescue => e
               span.record_exception(e)
               span.status = ::OpenTelemetry::Trace::Status.error("Streaming error: #{e.message}")
@@ -392,8 +417,13 @@ module Braintrust
             stream.define_singleton_method(:text) do
               text_enum = original_text.call
               # Wrap the returned enumerable's .each method
+              original_text_each = text_enum.method(:each)
               text_enum.define_singleton_method(:each) do |&block|
-                super(&block)
+                original_text_each.call do |delta|
+                  # Capture time to first token on first delta
+                  time_to_first_token ||= Time.now - start_time
+                  block&.call(delta)
+                end
               rescue => e
                 span.record_exception(e)
                 span.status = ::OpenTelemetry::Trace::Status.error("Streaming error: #{e.message}")

data/lib/braintrust/trace/tokens.rb CHANGED Viewed

@@ -14,16 +14,24 @@ module Braintrust
       return metrics unless usage_hash.is_a?(Hash)
       # Field mappings: OpenAI → Braintrust
+      # Supports both Chat Completions API (prompt_tokens, completion_tokens)
+      # and Responses API (input_tokens, output_tokens)
       field_map = {
         "prompt_tokens" => "prompt_tokens",
         "completion_tokens" => "completion_tokens",
-        "total_tokens" => "tokens"
+        "total_tokens" => "tokens",
+        # Responses API uses different field names
+        "input_tokens" => "prompt_tokens",
+        "output_tokens" => "completion_tokens"
       }
       # Prefix mappings for *_tokens_details
       prefix_map = {
         "prompt" => "prompt",
-        "completion" => "completion"
+        "completion" => "completion",
+        # Responses API uses input/output prefixes
+        "input" => "prompt",
+        "output" => "completion"
       }
       usage_hash.each do |key, value|

data/lib/braintrust/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Braintrust
-  VERSION = "0.0.6"
+  VERSION = "0.0.8"
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: braintrust
 version: !ruby/object:Gem::Version
-  version: 0.0.6
+  version: 0.0.8
 platform: ruby
 authors:
 - Braintrust