RubyGems - braintrust - Versions diffs - 0.0.4 → 0.0.6 - Mend

braintrust 0.0.4 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

checksums.yaml +4 -4
data/README.md +31 -1
data/lib/braintrust/state.rb +21 -3
data/lib/braintrust/trace/contrib/anthropic.rb +85 -208
data/lib/braintrust/trace/contrib/github.com/alexrudall/ruby-openai/ruby-openai.rb +135 -0
data/lib/braintrust/trace/contrib/github.com/crmne/ruby_llm.rb +447 -0
data/lib/braintrust/trace/contrib/openai.rb +121 -68
data/lib/braintrust/trace/tokens.rb +101 -0
data/lib/braintrust/trace.rb +38 -3
data/lib/braintrust/version.rb +1 -1
metadata +6 -3

data/lib/braintrust/trace/contrib/github.com/crmne/ruby_llm.rb ADDED Viewed

@@ -0,0 +1,447 @@
+# frozen_string_literal: true
+require "opentelemetry/sdk"
+require "json"
+require_relative "../../../tokens"
+require_relative "../../../../logger"
+module Braintrust
+  module Trace
+    module Contrib
+      module Github
+        module Crmne
+          module RubyLLM
+            # Helper to safely set a JSON attribute on a span
+            # Only sets the attribute if obj is present
+            # @param span [OpenTelemetry::Trace::Span] the span to set attribute on
+            # @param attr_name [String] the attribute name (e.g., "braintrust.output_json")
+            # @param obj [Object] the object to serialize to JSON
+            # @return [void]
+            def self.set_json_attr(span, attr_name, obj)
+              return unless obj
+              span.set_attribute(attr_name, JSON.generate(obj))
+            rescue => e
+              Log.debug("Failed to serialize #{attr_name}: #{e.message}")
+            end
+            # Parse usage tokens from RubyLLM response
+            # RubyLLM uses Anthropic-style field naming (input_tokens, output_tokens)
+            # @param usage [Hash, Object] usage object from RubyLLM response
+            # @return [Hash<String, Integer>] metrics hash with normalized names
+            def self.parse_usage_tokens(usage)
+              Braintrust::Trace.parse_anthropic_usage_tokens(usage)
+            end
+            # Wrap RubyLLM to automatically create spans for chat requests
+            # Supports both synchronous and streaming requests
+            #
+            # Usage:
+            #   # Wrap the class once (affects all future instances):
+            #   Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap
+            #
+            #   # Or wrap a specific instance:
+            #   chat = RubyLLM.chat(model: "gpt-4o-mini")
+            #   Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap(chat)
+            #
+            # @param chat [RubyLLM::Chat, nil] the RubyLLM chat instance to wrap (if nil, wraps the class)
+            # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider (defaults to global)
+            def self.wrap(chat = nil, tracer_provider: nil)
+              tracer_provider ||= ::OpenTelemetry.tracer_provider
+              # If no chat instance provided, wrap the class globally via initialize hook
+              if chat.nil?
+                return if defined?(::RubyLLM::Chat) && ::RubyLLM::Chat.instance_variable_defined?(:@braintrust_wrapper_module)
+                # Create module that wraps initialize to auto-wrap each new instance
+                wrapper_module = Module.new do
+                  define_method(:initialize) do |*args, **kwargs, &block|
+                    super(*args, **kwargs, &block)
+                    # Auto-wrap this instance during initialization
+                    Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap(self, tracer_provider: tracer_provider)
+                    self
+                  end
+                end
+                # Store reference to wrapper module for cleanup
+                ::RubyLLM::Chat.instance_variable_set(:@braintrust_wrapper_module, wrapper_module)
+                ::RubyLLM::Chat.prepend(wrapper_module)
+                return nil
+              end
+              # Check if already wrapped to make this idempotent
+              return chat if chat.instance_variable_get(:@braintrust_wrapped)
+              # Create a wrapper module that intercepts chat.ask
+              wrapper = create_wrapper_module(tracer_provider)
+              # Mark as wrapped and prepend the wrapper to the chat instance
+              chat.instance_variable_set(:@braintrust_wrapped, true)
+              chat.singleton_class.prepend(wrapper)
+              chat
+            end
+            # Unwrap RubyLLM to remove Braintrust tracing
+            # For class-level unwrapping, removes the initialize override from the wrapper module
+            # For instance-level unwrapping, clears the wrapped flag
+            #
+            # @param chat [RubyLLM::Chat, nil] the RubyLLM chat instance to unwrap (if nil, unwraps the class)
+            def self.unwrap(chat = nil)
+              # If no chat instance provided, unwrap the class globally
+              if chat.nil?
+                if defined?(::RubyLLM::Chat) && ::RubyLLM::Chat.instance_variable_defined?(:@braintrust_wrapper_module)
+                  wrapper_module = ::RubyLLM::Chat.instance_variable_get(:@braintrust_wrapper_module)
+                  # Redefine initialize to just call super (disables auto-wrapping)
+                  # We can't actually remove a prepended module, so we make it a no-op
+                  wrapper_module.module_eval do
+                    define_method(:initialize) do |*args, **kwargs, &block|
+                      super(*args, **kwargs, &block)
+                    end
+                  end
+                  ::RubyLLM::Chat.remove_instance_variable(:@braintrust_wrapper_module)
+                end
+                return nil
+              end
+              # Unwrap instance
+              chat.remove_instance_variable(:@braintrust_wrapped) if chat.instance_variable_defined?(:@braintrust_wrapped)
+              chat
+            end
+            # Wrap the RubyLLM::Chat class globally
+            # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider
+            def self.wrap_class(tracer_provider)
+              return unless defined?(::RubyLLM::Chat)
+              wrapper = create_wrapper_module(tracer_provider)
+              ::RubyLLM::Chat.prepend(wrapper)
+            end
+            # Create the wrapper module that intercepts chat.ask
+            # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider
+            # @return [Module] the wrapper module
+            def self.create_wrapper_module(tracer_provider)
+              Module.new do
+                define_method(:ask) do |prompt = nil, **params, &block|
+                  tracer = tracer_provider.tracer("braintrust")
+                  if block
+                    # Handle streaming request
+                    wrapped_block = proc do |chunk|
+                      block.call(chunk)
+                    end
+                    Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.handle_streaming_ask(self, tracer, prompt, params, block) do |aggregated_chunks|
+                      super(prompt, **params) do |chunk|
+                        aggregated_chunks << chunk
+                        wrapped_block.call(chunk)
+                      end
+                    end
+                  else
+                    # Handle non-streaming request
+                    Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.handle_non_streaming_ask(self, tracer, prompt, params) do
+                      super(prompt, **params)
+                    end
+                  end
+                end
+              end
+            end
+            # Handle streaming chat request with tracing
+            # @param chat [RubyLLM::Chat] the chat instance
+            # @param tracer [OpenTelemetry::Trace::Tracer] the tracer
+            # @param prompt [String, nil] the user prompt
+            # @param params [Hash] additional parameters
+            # @param block [Proc] the streaming block
+            def self.handle_streaming_ask(chat, tracer, prompt, params, block)
+              # Start span immediately for accurate timing
+              span = tracer.start_span("ruby_llm.chat.ask")
+              aggregated_chunks = []
+              # Extract metadata and build input messages
+              metadata = extract_metadata(chat, stream: true)
+              input_messages = build_input_messages(chat, prompt)
+              # Set input and metadata
+              set_json_attr(span, "braintrust.input_json", input_messages) if input_messages.any?
+              set_json_attr(span, "braintrust.metadata", metadata)
+              # Call original method, passing aggregated_chunks to the block
+              begin
+                result = yield aggregated_chunks
+              rescue => e
+                span.record_exception(e)
+                span.status = ::OpenTelemetry::Trace::Status.error("RubyLLM error: #{e.message}")
+                span.finish
+                raise
+              end
+              # Set output and metrics from aggregated chunks
+              capture_streaming_output(span, aggregated_chunks, result)
+              span.finish
+              result
+            end
+            # Handle non-streaming chat request with tracing
+            # @param chat [RubyLLM::Chat] the chat instance
+            # @param tracer [OpenTelemetry::Trace::Tracer] the tracer
+            # @param prompt [String, nil] the user prompt
+            # @param params [Hash] additional parameters
+            def self.handle_non_streaming_ask(chat, tracer, prompt, params)
+              # Start span immediately for accurate timing
+              span = tracer.start_span("ruby_llm.chat.ask")
+              begin
+                # Extract metadata and build input messages
+                metadata = extract_metadata(chat)
+                input_messages = build_input_messages(chat, prompt)
+                set_json_attr(span, "braintrust.input_json", input_messages) if input_messages.any?
+                # Remember message count before the call (for tool call detection)
+                messages_before_count = (chat.respond_to?(:messages) && chat.messages) ? chat.messages.length : 0
+                # Call the original method
+                response = yield
+                # Capture output and metrics
+                capture_non_streaming_output(span, chat, response, messages_before_count)
+                # Set metadata
+                set_json_attr(span, "braintrust.metadata", metadata)
+                response
+              ensure
+                span.finish
+              end
+            end
+            # Extract metadata from chat instance (provider, model, tools, stream flag)
+            # @param chat [RubyLLM::Chat] the chat instance
+            # @param stream [Boolean] whether this is a streaming request
+            # @return [Hash] metadata hash
+            def self.extract_metadata(chat, stream: false)
+              metadata = {"provider" => "ruby_llm"}
+              metadata["stream"] = true if stream
+              # Extract model
+              if chat.respond_to?(:model) && chat.model
+                model = chat.model.respond_to?(:id) ? chat.model.id : chat.model.to_s
+                metadata["model"] = model
+              end
+              # Extract tools (only for non-streaming)
+              if !stream && chat.respond_to?(:tools) && chat.tools&.any?
+                metadata["tools"] = extract_tools_metadata(chat)
+              end
+              metadata
+            end
+            # Extract tools metadata from chat instance
+            # @param chat [RubyLLM::Chat] the chat instance
+            # @return [Array<Hash>] array of tool schemas
+            def self.extract_tools_metadata(chat)
+              provider = chat.instance_variable_get(:@provider) if chat.instance_variable_defined?(:@provider)
+              chat.tools.map do |_name, tool|
+                format_tool_schema(tool, provider)
+              end
+            end
+            # Format a tool into OpenAI-compatible schema
+            # @param tool [Object] the tool object
+            # @param provider [Object, nil] the provider instance
+            # @return [Hash] tool schema
+            def self.format_tool_schema(tool, provider)
+              tool_schema = nil
+              # Use provider-specific tool_for method if available
+              if provider
+                begin
+                  tool_schema = if provider.is_a?(::RubyLLM::Providers::OpenAI)
+                    ::RubyLLM::Providers::OpenAI::Tools.tool_for(tool)
+                  elsif defined?(::RubyLLM::Providers::Anthropic) && provider.is_a?(::RubyLLM::Providers::Anthropic)
+                    ::RubyLLM::Providers::Anthropic::Tools.tool_for(tool)
+                  elsif tool.respond_to?(:params_schema) && tool.params_schema
+                    build_basic_tool_schema(tool)
+                  else
+                    build_minimal_tool_schema(tool)
+                  end
+                rescue NameError, ArgumentError => e
+                  # If provider-specific tool_for fails, fall back to basic format
+                  Log.debug("Failed to extract tool schema using provider-specific method: #{e.class.name}: #{e.message}")
+                  tool_schema = (tool.respond_to?(:params_schema) && tool.params_schema) ? build_basic_tool_schema(tool) : build_minimal_tool_schema(tool)
+                end
+              else
+                # No provider, use basic format with params_schema if available
+                tool_schema = (tool.respond_to?(:params_schema) && tool.params_schema) ? build_basic_tool_schema(tool) : build_minimal_tool_schema(tool)
+              end
+              # Strip RubyLLM-specific fields to match native OpenAI format
+              # Handle both symbol and string keys
+              function_key = tool_schema&.key?(:function) ? :function : "function"
+              if tool_schema && tool_schema[function_key]
+                tool_params = tool_schema[function_key][:parameters] || tool_schema[function_key]["parameters"]
+                if tool_params.is_a?(Hash)
+                  tool_params.delete("strict")
+                  tool_params.delete(:strict)
+                  tool_params.delete("additionalProperties")
+                  tool_params.delete(:additionalProperties)
+                end
+              end
+              tool_schema
+            end
+            # Build a basic tool schema with parameters
+            # @param tool [Object] the tool object
+            # @return [Hash] tool schema
+            def self.build_basic_tool_schema(tool)
+              {
+                "type" => "function",
+                "function" => {
+                  "name" => tool.name.to_s,
+                  "description" => tool.description,
+                  "parameters" => tool.params_schema
+                }
+              }
+            end
+            # Build a minimal tool schema without parameters
+            # @param tool [Object] the tool object
+            # @return [Hash] tool schema
+            def self.build_minimal_tool_schema(tool)
+              {
+                "type" => "function",
+                "function" => {
+                  "name" => tool.name.to_s,
+                  "description" => tool.description,
+                  "parameters" => {}
+                }
+              }
+            end
+            # Build input messages array from chat history and prompt
+            # @param chat [RubyLLM::Chat] the chat instance
+            # @param prompt [String, nil] the user prompt
+            # @return [Array<Hash>] array of message hashes
+            def self.build_input_messages(chat, prompt)
+              input_messages = []
+              # Add conversation history
+              if chat.respond_to?(:messages) && chat.messages&.any?
+                input_messages = chat.messages.map { |m| m.respond_to?(:to_h) ? m.to_h : m }
+              end
+              # Add current prompt
+              input_messages << {role: "user", content: prompt} if prompt
+              input_messages
+            end
+            # Capture streaming output and metrics
+            # @param span [OpenTelemetry::Trace::Span] the span
+            # @param aggregated_chunks [Array] the aggregated chunks
+            # @param result [Object] the result object
+            def self.capture_streaming_output(span, aggregated_chunks, result)
+              return if aggregated_chunks.empty?
+              # Aggregate content from chunks
+              aggregated_content = aggregated_chunks.map { |c|
+                c.respond_to?(:content) ? c.content : c.to_s
+              }.join
+              output = [{
+                role: "assistant",
+                content: aggregated_content
+              }]
+              set_json_attr(span, "braintrust.output_json", output)
+              # Try to extract usage from the result
+              if result.respond_to?(:usage) && result.usage
+                metrics = parse_usage_tokens(result.usage)
+                set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
+              end
+            end
+            # Capture non-streaming output and metrics
+            # @param span [OpenTelemetry::Trace::Span] the span
+            # @param chat [RubyLLM::Chat] the chat instance
+            # @param response [Object] the response object
+            # @param messages_before_count [Integer] message count before the call
+            def self.capture_non_streaming_output(span, chat, response, messages_before_count)
+              return unless response
+              # Build message object from response
+              message = {
+                "role" => "assistant",
+                "content" => nil
+              }
+              # Add content if it's a simple text response
+              if response.respond_to?(:content) && response.content && !response.content.empty?
+                message["content"] = response.content
+              end
+              # Check if there are tool calls in the messages history
+              if chat.respond_to?(:messages) && chat.messages
+                assistant_msg = chat.messages[(messages_before_count + 1)..].find { |m|
+                  m.role.to_s == "assistant" && m.respond_to?(:tool_calls) && m.tool_calls&.any?
+                }
+                if assistant_msg&.tool_calls&.any?
+                  message["tool_calls"] = format_tool_calls(assistant_msg.tool_calls)
+                  message["content"] = nil
+                end
+              end
+              # Format as OpenAI choices[] structure
+              output = [{
+                "index" => 0,
+                "message" => message,
+                "finish_reason" => message["tool_calls"] ? "tool_calls" : "stop"
+              }]
+              set_json_attr(span, "braintrust.output_json", output)
+              # Set metrics (token usage)
+              if response.respond_to?(:to_h)
+                response_hash = response.to_h
+                usage = {
+                  "input_tokens" => response_hash[:input_tokens],
+                  "output_tokens" => response_hash[:output_tokens],
+                  "cached_tokens" => response_hash[:cached_tokens],
+                  "cache_creation_tokens" => response_hash[:cache_creation_tokens]
+                }.compact
+                unless usage.empty?
+                  metrics = parse_usage_tokens(usage)
+                  set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
+                end
+              end
+            end
+            # Format tool calls into OpenAI format
+            # @param tool_calls [Hash, Array] the tool calls
+            # @return [Array<Hash>] formatted tool calls
+            def self.format_tool_calls(tool_calls)
+              tool_calls.map do |_id, tc|
+                # Ensure arguments is a JSON string (OpenAI format)
+                args = tc.arguments
+                args_string = args.is_a?(String) ? args : JSON.generate(args)
+                {
+                  "id" => tc.id,
+                  "type" => "function",
+                  "function" => {
+                    "name" => tc.name,
+                    "arguments" => args_string
+                  }
+                }
+              end
+            end
+          end
+        end
+      end
+    end
+  end
+end

data/lib/braintrust/trace/contrib/openai.rb CHANGED Viewed

@@ -2,6 +2,7 @@
 require "opentelemetry/sdk"
 require "json"
+require_relative "../tokens"
 module Braintrust
   module Trace
@@ -17,72 +18,11 @@ module Braintrust
         span.set_attribute(attr_name, JSON.generate(obj))
       end
-      # Parse usage tokens from OpenAI API response, handling nested token_details
-      # Maps OpenAI field names to Braintrust standard names:
-      # - input_tokens → prompt_tokens
-      # - output_tokens → completion_tokens
-      # - total_tokens → tokens
-      # - *_tokens_details.* → prefix_*
-      #
+      # Parse usage tokens from OpenAI API response
       # @param usage [Hash, Object] usage object from OpenAI response
       # @return [Hash<String, Integer>] metrics hash with normalized names
       def self.parse_usage_tokens(usage)
-        metrics = {}
-        return metrics unless usage
-        # Convert to hash if it's an object
-        usage_hash = usage.respond_to?(:to_h) ? usage.to_h : usage
-        usage_hash.each do |key, value|
-          key_str = key.to_s
-          # Handle nested *_tokens_details objects
-          if key_str.end_with?("_tokens_details")
-            # Convert to hash if it's an object (OpenAI gem returns objects)
-            details_hash = value.respond_to?(:to_h) ? value.to_h : value
-            next unless details_hash.is_a?(Hash)
-            # Extract prefix (e.g., "prompt" from "prompt_tokens_details")
-            prefix = key_str.sub(/_tokens_details$/, "")
-            # Translate "input" → "prompt", "output" → "completion"
-            prefix = translate_metric_prefix(prefix)
-            # Process nested fields (e.g., cached_tokens, reasoning_tokens)
-            details_hash.each do |detail_key, detail_value|
-              next unless detail_value.is_a?(Numeric)
-              metrics["#{prefix}_#{detail_key}"] = detail_value.to_i
-            end
-          elsif value.is_a?(Numeric)
-            # Handle top-level token fields
-            case key_str
-            when "input_tokens"
-              metrics["prompt_tokens"] = value.to_i
-            when "output_tokens"
-              metrics["completion_tokens"] = value.to_i
-            when "total_tokens"
-              metrics["tokens"] = value.to_i
-            else
-              # Keep other numeric fields as-is (future-proofing)
-              metrics[key_str] = value.to_i
-            end
-          end
-        end
-        metrics
-      end
-      # Translate metric prefix to be consistent between different API formats
-      # @param prefix [String] the prefix to translate
-      # @return [String] translated prefix
-      def self.translate_metric_prefix(prefix)
-        case prefix
-        when "input"
-          "prompt"
-        when "output"
-          "completion"
-        else
-          prefix
-        end
+        Braintrust::Trace.parse_openai_usage_tokens(usage)
       end
       # Aggregate streaming chunks into a single response structure
@@ -124,7 +64,7 @@ module Braintrust
             choice_data[index] ||= {
               index: index,
               role: nil,
-              content: "",
+              content: +"",
               tool_calls: [],
               finish_reason: nil
             }
@@ -136,7 +76,7 @@ module Braintrust
             # Aggregate content
             if delta[:content]
-              choice_data[index][:content] += delta[:content]
+              choice_data[index][:content] << delta[:content]
             end
             # Aggregate tool_calls (similar to Go SDK logic)
@@ -149,15 +89,15 @@ module Braintrust
                     id: tool_call_delta[:id],
                     type: tool_call_delta[:type],
                     function: {
-                      name: tool_call_delta.dig(:function, :name) || "",
-                      arguments: tool_call_delta.dig(:function, :arguments) || ""
+                      name: tool_call_delta.dig(:function, :name) || +"",
+                      arguments: tool_call_delta.dig(:function, :arguments) || +""
                     }
                   }
                 elsif choice_data[index][:tool_calls].any?
                   # Continuation - append arguments to last tool call
                   last_tool_call = choice_data[index][:tool_calls].last
                   if tool_call_delta.dig(:function, :arguments)
-                    last_tool_call[:function][:arguments] += tool_call_delta[:function][:arguments]
+                    last_tool_call[:function][:arguments] << tool_call_delta[:function][:arguments]
                   end
                 end
               end
@@ -353,6 +293,119 @@ module Braintrust
             stream
           end
+          # Wrap stream for streaming chat completions (returns ChatCompletionStream with convenience methods)
+          define_method(:stream) do |**params|
+            tracer = tracer_provider.tracer("braintrust")
+            metadata = {
+              "provider" => "openai",
+              "endpoint" => "/v1/chat/completions"
+            }
+            # Start span with proper context (will be child of current span if any)
+            span = tracer.start_span("openai.chat.completions.create")
+            # Capture request metadata fields
+            metadata_fields = %i[
+              model frequency_penalty logit_bias logprobs max_tokens n
+              presence_penalty response_format seed service_tier stop
+              stream stream_options temperature top_p top_logprobs
+              tools tool_choice parallel_tool_calls user functions function_call
+            ]
+            metadata_fields.each do |field|
+              metadata[field.to_s] = params[field] if params.key?(field)
+            end
+            metadata["stream"] = true  # Explicitly mark as streaming
+            # Set input messages as JSON
+            if params[:messages]
+              messages_array = params[:messages].map(&:to_h)
+              span.set_attribute("braintrust.input_json", JSON.generate(messages_array))
+            end
+            # Set initial metadata
+            span.set_attribute("braintrust.metadata", JSON.generate(metadata))
+            # Call the original stream method with error handling
+            begin
+              stream = super(**params)
+            rescue => e
+              # Record exception if stream creation fails
+              span.record_exception(e)
+              span.status = ::OpenTelemetry::Trace::Status.error("OpenAI API error: #{e.message}")
+              span.finish
+              raise
+            end
+            # Local helper for setting JSON attributes
+            set_json_attr = ->(attr_name, obj) { Braintrust::Trace::OpenAI.set_json_attr(span, attr_name, obj) }
+            # Helper to extract metadata from SDK's internal snapshot
+            extract_stream_metadata = lambda do
+              # Access the SDK's internal accumulated completion snapshot
+              snapshot = stream.current_completion_snapshot
+              return unless snapshot
+              # Set output from accumulated choices
+              if snapshot.choices&.any?
+                choices_array = snapshot.choices.map(&:to_h)
+                set_json_attr.call("braintrust.output_json", choices_array)
+              end
+              # Set metrics if usage is available
+              if snapshot.usage
+                metrics = Braintrust::Trace::OpenAI.parse_usage_tokens(snapshot.usage)
+                set_json_attr.call("braintrust.metrics", metrics) unless metrics.empty?
+              end
+              # Update metadata with response fields
+              metadata["id"] = snapshot.id if snapshot.respond_to?(:id) && snapshot.id
+              metadata["created"] = snapshot.created if snapshot.respond_to?(:created) && snapshot.created
+              metadata["model"] = snapshot.model if snapshot.respond_to?(:model) && snapshot.model
+              metadata["system_fingerprint"] = snapshot.system_fingerprint if snapshot.respond_to?(:system_fingerprint) && snapshot.system_fingerprint
+              set_json_attr.call("braintrust.metadata", metadata)
+            end
+            # Prevent double-finish of span
+            finish_braintrust_span = lambda do
+              return if stream.instance_variable_get(:@braintrust_span_finished)
+              stream.instance_variable_set(:@braintrust_span_finished, true)
+              extract_stream_metadata.call
+              span.finish
+            end
+            # Wrap .each() method - this is the core consumption method
+            original_each = stream.method(:each)
+            stream.define_singleton_method(:each) do |&block|
+              original_each.call(&block)
+            rescue => e
+              span.record_exception(e)
+              span.status = ::OpenTelemetry::Trace::Status.error("Streaming error: #{e.message}")
+              raise
+            ensure
+              finish_braintrust_span.call
+            end
+            # Wrap .text() method - returns enumerable for text deltas
+            original_text = stream.method(:text)
+            stream.define_singleton_method(:text) do
+              text_enum = original_text.call
+              # Wrap the returned enumerable's .each method
+              text_enum.define_singleton_method(:each) do |&block|
+                super(&block)
+              rescue => e
+                span.record_exception(e)
+                span.status = ::OpenTelemetry::Trace::Status.error("Streaming error: #{e.message}")
+                raise
+              ensure
+                finish_braintrust_span.call
+              end
+              text_enum
+            end
+            stream
+          end
         end
         # Prepend the wrapper to the completions resource