RubyGems - swarm_memory - Versions diffs - 2.1.2 → 2.1.3 - Mend

swarm_memory 2.1.2 → 2.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

checksums.yaml +4 -4
data/lib/claude_swarm/configuration.rb +28 -4
data/lib/claude_swarm/mcp_generator.rb +4 -10
data/lib/claude_swarm/version.rb +1 -1
data/lib/swarm_cli/commands/mcp_serve.rb +2 -2
data/lib/swarm_cli/config_loader.rb +3 -3
data/lib/swarm_cli/version.rb +1 -1
data/lib/swarm_memory/adapters/base.rb +4 -4
data/lib/swarm_memory/core/storage_read_tracker.rb +51 -14
data/lib/swarm_memory/integration/cli_registration.rb +3 -2
data/lib/swarm_memory/integration/sdk_plugin.rb +11 -5
data/lib/swarm_memory/tools/memory_edit.rb +2 -2
data/lib/swarm_memory/tools/memory_multi_edit.rb +2 -2
data/lib/swarm_memory/tools/memory_read.rb +3 -3
data/lib/swarm_memory/version.rb +1 -1
data/lib/swarm_memory.rb +5 -0
data/lib/swarm_sdk/agent/builder.rb +33 -0
data/lib/swarm_sdk/agent/chat/context_tracker.rb +33 -0
data/lib/swarm_sdk/agent/chat/hook_integration.rb +41 -0
data/lib/swarm_sdk/agent/chat/system_reminder_injector.rb +11 -27
data/lib/swarm_sdk/agent/chat.rb +198 -51
data/lib/swarm_sdk/agent/context.rb +6 -2
data/lib/swarm_sdk/agent/context_manager.rb +6 -0
data/lib/swarm_sdk/agent/definition.rb +15 -22
data/lib/swarm_sdk/agent/llm_instrumentation_middleware.rb +180 -0
data/lib/swarm_sdk/configuration.rb +420 -103
data/lib/swarm_sdk/events_to_messages.rb +181 -0
data/lib/swarm_sdk/log_collector.rb +31 -5
data/lib/swarm_sdk/log_stream.rb +37 -8
data/lib/swarm_sdk/model_aliases.json +4 -1
data/lib/swarm_sdk/node/agent_config.rb +33 -8
data/lib/swarm_sdk/node/builder.rb +39 -18
data/lib/swarm_sdk/node_orchestrator.rb +293 -26
data/lib/swarm_sdk/proc_helpers.rb +53 -0
data/lib/swarm_sdk/prompts/base_system_prompt.md.erb +0 -126
data/lib/swarm_sdk/providers/openai_with_responses.rb +22 -15
data/lib/swarm_sdk/restore_result.rb +65 -0
data/lib/swarm_sdk/snapshot.rb +156 -0
data/lib/swarm_sdk/snapshot_from_events.rb +386 -0
data/lib/swarm_sdk/state_restorer.rb +491 -0
data/lib/swarm_sdk/state_snapshot.rb +369 -0
data/lib/swarm_sdk/swarm/agent_initializer.rb +360 -55
data/lib/swarm_sdk/swarm/all_agents_builder.rb +28 -1
data/lib/swarm_sdk/swarm/builder.rb +208 -12
data/lib/swarm_sdk/swarm/swarm_registry_builder.rb +67 -0
data/lib/swarm_sdk/swarm/tool_configurator.rb +46 -11
data/lib/swarm_sdk/swarm.rb +367 -90
data/lib/swarm_sdk/swarm_loader.rb +145 -0
data/lib/swarm_sdk/swarm_registry.rb +136 -0
data/lib/swarm_sdk/tools/delegate.rb +92 -7
data/lib/swarm_sdk/tools/read.rb +17 -5
data/lib/swarm_sdk/tools/scratchpad/scratchpad_list.rb +23 -2
data/lib/swarm_sdk/tools/scratchpad/scratchpad_read.rb +23 -2
data/lib/swarm_sdk/tools/scratchpad/scratchpad_write.rb +21 -4
data/lib/swarm_sdk/tools/stores/read_tracker.rb +47 -12
data/lib/swarm_sdk/tools/stores/scratchpad_storage.rb +45 -0
data/lib/swarm_sdk/tools/stores/storage.rb +4 -4
data/lib/swarm_sdk/tools/think.rb +4 -1
data/lib/swarm_sdk/tools/todo_write.rb +20 -8
data/lib/swarm_sdk/utils.rb +18 -0
data/lib/swarm_sdk/validation_result.rb +33 -0
data/lib/swarm_sdk/version.rb +1 -1
data/lib/swarm_sdk.rb +362 -21
metadata +17 -5

data/lib/swarm_sdk/agent/chat.rb CHANGED Viewed

@@ -150,6 +150,7 @@ module SwarmSDK
         raise StateError, "Agent context not set. Call setup_context first." unless @agent_context
         @context_tracker.setup_logging
+        inject_llm_instrumentation
       end
       # Emit model lookup warning if one occurred during initialization
@@ -164,6 +165,8 @@ module SwarmSDK
         LogStream.emit(
           type: "model_lookup_warning",
           agent: agent_name,
+          swarm_id: @agent_context&.swarm_id,
+          parent_swarm_id: @agent_context&.parent_swarm_id,
           model: @model_lookup_error[:model],
           error_message: @model_lookup_error[:error_message],
           suggestions: @model_lookup_error[:suggestions].map { |s| { id: s.id, name: s.name, context_window: s.context_window } },
@@ -221,6 +224,17 @@ module SwarmSDK
         !@active_skill_path.nil?
       end
+      # Clear conversation history
+      #
+      # Removes all messages from the conversation history and clears tool executions.
+      # Used by composable swarms when keep_context: false is specified.
+      #
+      # @return [void]
+      def clear_conversation
+        @messages.clear if @messages.respond_to?(:clear)
+        @context_manager&.clear_ephemeral
+      end
       # Override ask to inject system reminders and periodic TodoWrite reminders
       #
       # Note: This is called BEFORE HookIntegration#ask (due to module include order),
@@ -230,63 +244,72 @@ module SwarmSDK
       # @param options [Hash] Additional options to pass to complete
       # @return [RubyLLM::Message] LLM response
       def ask(prompt, **options)
-        # Check if this is the first user message
-        is_first = SystemReminderInjector.first_message?(self)
-        if is_first
-          # Collect plugin reminders first
-          plugin_reminders = collect_plugin_reminders(prompt, is_first_message: true)
-          # Build full prompt with embedded plugin reminders
-          full_prompt = prompt
-          plugin_reminders.each do |reminder|
-            full_prompt = "#{full_prompt}\n\n#{reminder}"
-          end
-          # Inject first message reminders (includes system reminders + toolset + after)
-          # SystemReminderInjector will embed all reminders in the prompt via add_message
-          SystemReminderInjector.inject_first_message_reminders(self, full_prompt)
+        # Serialize ask() calls to prevent message corruption from concurrent fibers
+        # Uses Async::Semaphore (not Mutex) because SwarmSDK runs in fiber context
+        # This protects against parallel delegation scenarios where multiple delegation
+        # instances call the same underlying primary agent (e.g., tester@frontend and
+        # tester@backend both calling database in parallel).
+        @ask_semaphore ||= Async::Semaphore.new(1)
+        @ask_semaphore.acquire do
+          # Check if this is the first user message
+          is_first = SystemReminderInjector.first_message?(self)
+          if is_first
+            # Collect plugin reminders first
+            plugin_reminders = collect_plugin_reminders(prompt, is_first_message: true)
+            # Build full prompt with embedded plugin reminders
+            full_prompt = prompt
+            plugin_reminders.each do |reminder|
+              full_prompt = "#{full_prompt}\n\n#{reminder}"
+            end
-          # Trigger user_prompt hook manually since we're bypassing the normal ask flow
-          if @hook_executor
-            hook_result = trigger_user_prompt(prompt)
+            # Inject first message reminders (includes system reminders + toolset + after)
+            # SystemReminderInjector will embed all reminders in the prompt via add_message
+            SystemReminderInjector.inject_first_message_reminders(self, full_prompt)
+            # Trigger user_prompt hook manually since we're bypassing the normal ask flow
+            if @hook_executor
+              hook_result = trigger_user_prompt(prompt)
+              # Check if hook halted execution
+              if hook_result[:halted]
+                # Return a halted message instead of calling LLM
+                return RubyLLM::Message.new(
+                  role: :assistant,
+                  content: hook_result[:halt_message],
+                  model_id: model.id,
+                )
+              end
-            # Check if hook halted execution
-            if hook_result[:halted]
-              # Return a halted message instead of calling LLM
-              return RubyLLM::Message.new(
-                role: :assistant,
-                content: hook_result[:halt_message],
-                model_id: model.id,
-              )
+              # NOTE: We ignore modified_prompt for first message since reminders already injected
             end
-            # NOTE: We ignore modified_prompt for first message since reminders already injected
-          end
+            # Call complete to get LLM response
+            complete(**options)
+          else
+            # Build prompt with embedded reminders (if needed)
+            full_prompt = prompt
+            # Add periodic TodoWrite reminder if needed (only if agent has TodoWrite tool)
+            if tools.key?("TodoWrite") && SystemReminderInjector.should_inject_todowrite_reminder?(self, @last_todowrite_message_index)
+              full_prompt = "#{full_prompt}\n\n#{SystemReminderInjector::TODOWRITE_PERIODIC_REMINDER}"
+              # Update tracking
+              @last_todowrite_message_index = SystemReminderInjector.find_last_todowrite_index(self)
+            end
-          # Call complete to get LLM response
-          complete(**options)
-        else
-          # Build prompt with embedded reminders (if needed)
-          full_prompt = prompt
-          # Add periodic TodoWrite reminder if needed
-          if SystemReminderInjector.should_inject_todowrite_reminder?(self, @last_todowrite_message_index)
-            full_prompt = "#{full_prompt}\n\n#{SystemReminderInjector::TODOWRITE_PERIODIC_REMINDER}"
-            # Update tracking
-            @last_todowrite_message_index = SystemReminderInjector.find_last_todowrite_index(self)
-          end
+            # Collect plugin reminders and embed them
+            plugin_reminders = collect_plugin_reminders(full_prompt, is_first_message: false)
+            plugin_reminders.each do |reminder|
+              full_prompt = "#{full_prompt}\n\n#{reminder}"
+            end
-          # Collect plugin reminders and embed them
-          plugin_reminders = collect_plugin_reminders(full_prompt, is_first_message: false)
-          plugin_reminders.each do |reminder|
-            full_prompt = "#{full_prompt}\n\n#{reminder}"
+            # Normal ask behavior for subsequent messages
+            # This calls super which goes to HookIntegration's ask override
+            # HookIntegration will call add_message, and we'll extract reminders there
+            super(full_prompt, **options)
           end
-          # Normal ask behavior for subsequent messages
-          # This calls super which goes to HookIntegration's ask override
-          # HookIntegration will call add_message, and we'll extract reminders there
-          super(full_prompt, **options)
         end
       end
@@ -674,7 +697,15 @@ module SwarmSDK
       # This is needed for setting agent_name and other provider-specific settings.
       #
       # @return [RubyLLM::Provider::Base] Provider instance
-      attr_reader :provider, :global_semaphore, :local_semaphore, :real_model_info, :context_tracker, :context_manager
+      attr_reader :provider, :global_semaphore, :local_semaphore, :real_model_info, :context_tracker, :context_manager, :agent_context, :last_todowrite_message_index, :active_skill_path
+      # Setters for snapshot/restore
+      attr_writer :last_todowrite_message_index, :active_skill_path
+      # Expose messages array (inherited from RubyLLM::Chat but not publicly accessible)
+      #
+      # @return [Array<RubyLLM::Message>] Conversation messages
+      attr_reader :messages
       # Get context window limit for the current model
       #
@@ -718,6 +749,37 @@ module SwarmSDK
         messages.select { |msg| msg.role == :assistant }.sum { |msg| msg.output_tokens || 0 }
       end
+      # Calculate cumulative cached tokens across all assistant messages
+      #
+      # Cached tokens are portions of prompts served from the provider's cache.
+      # OpenAI reports this automatically for prompts >1024 tokens.
+      # Anthropic/Bedrock expose cache control via Content::Raw blocks.
+      #
+      # @return [Integer] Total cached tokens used in conversation
+      def cumulative_cached_tokens
+        messages.select { |msg| msg.role == :assistant }.sum { |msg| msg.cached_tokens || 0 }
+      end
+      # Calculate cumulative cache creation tokens
+      #
+      # Cache creation tokens are written to the cache (Anthropic/Bedrock only).
+      # These are charged at the normal input rate when first created.
+      #
+      # @return [Integer] Total tokens written to cache
+      def cumulative_cache_creation_tokens
+        messages.select { |msg| msg.role == :assistant }.sum { |msg| msg.cache_creation_tokens || 0 }
+      end
+      # Calculate effective input tokens (excluding cache hits)
+      #
+      # This represents the actual tokens charged for input, excluding cached portions.
+      # Useful for accurate cost tracking when using prompt caching.
+      #
+      # @return [Integer] Actual input tokens charged (input minus cached)
+      def effective_input_tokens
+        cumulative_input_tokens - cumulative_cached_tokens
+      end
       # Calculate total tokens used (input + output)
       #
       # @return [Integer] Total tokens used in conversation
@@ -777,6 +839,85 @@ module SwarmSDK
       private
+      # Inject LLM instrumentation middleware for API request/response logging
+      #
+      # This middleware captures HTTP requests/responses to LLM providers and
+      # emits structured events via LogStream. Only injected when logging is enabled.
+      #
+      # @return [void]
+      def inject_llm_instrumentation
+        # Safety checks
+        return unless @provider
+        faraday_conn = @provider.connection&.connection
+        return unless faraday_conn
+        # Check if middleware is already present to prevent duplicates
+        return if @llm_instrumentation_injected
+        # Get provider name for logging
+        provider_name = @provider.class.name.split("::").last.downcase
+        # Inject middleware at beginning of stack (position 0)
+        # This ensures we capture raw requests before any transformations
+        # Use fully qualified name to ensure Zeitwerk loads it
+        faraday_conn.builder.insert(
+          0,
+          SwarmSDK::Agent::LLMInstrumentationMiddleware,
+          on_request: method(:handle_llm_api_request),
+          on_response: method(:handle_llm_api_response),
+          provider_name: provider_name,
+        )
+        # Mark as injected to prevent duplicates
+        @llm_instrumentation_injected = true
+        RubyLLM.logger.debug("SwarmSDK: Injected LLM instrumentation middleware for agent #{@agent_name}")
+      rescue StandardError => e
+        # Don't fail initialization if instrumentation fails
+        RubyLLM.logger.error("SwarmSDK: Failed to inject LLM instrumentation: #{e.message}")
+      end
+      # Handle LLM API request event
+      #
+      # Emits llm_api_request event via LogStream with request details.
+      #
+      # @param data [Hash] Request data from middleware
+      # @return [void]
+      def handle_llm_api_request(data)
+        return unless LogStream.emitter
+        LogStream.emit(
+          type: "llm_api_request",
+          agent: @agent_name,
+          swarm_id: @agent_context&.swarm_id,
+          parent_swarm_id: @agent_context&.parent_swarm_id,
+          **data,
+        )
+      rescue StandardError => e
+        RubyLLM.logger.error("SwarmSDK: Error emitting llm_api_request event: #{e.message}")
+      end
+      # Handle LLM API response event
+      #
+      # Emits llm_api_response event via LogStream with response details.
+      #
+      # @param data [Hash] Response data from middleware
+      # @return [void]
+      def handle_llm_api_response(data)
+        return unless LogStream.emitter
+        LogStream.emit(
+          type: "llm_api_response",
+          agent: @agent_name,
+          swarm_id: @agent_context&.swarm_id,
+          parent_swarm_id: @agent_context&.parent_swarm_id,
+          **data,
+        )
+      rescue StandardError => e
+        RubyLLM.logger.error("SwarmSDK: Error emitting llm_api_response event: #{e.message}")
+      end
       # Call LLM with retry logic for transient failures
       #
       # Retries up to 10 times with fixed 10-second delays for:
@@ -802,10 +943,13 @@ module SwarmSDK
               LogStream.emit(
                 type: "llm_retry_exhausted",
                 agent: @agent_name,
+                swarm_id: @agent_context&.swarm_id,
+                parent_swarm_id: @agent_context&.parent_swarm_id,
                 model: @model&.id,
                 attempts: attempts,
                 error_class: e.class.name,
                 error_message: e.message,
+                error_backtrace: e.backtrace,
               )
               raise
             end
@@ -814,11 +958,14 @@ module SwarmSDK
             LogStream.emit(
               type: "llm_retry_attempt",
               agent: @agent_name,
+              swarm_id: @agent_context&.swarm_id,
+              parent_swarm_id: @agent_context&.parent_swarm_id,
               model: @model&.id,
               attempt: attempts,
               max_retries: max_retries,
               error_class: e.class.name,
               error_message: e.message,
+              error_backtrace: e.backtrace,
               retry_delay: delay,
             )

data/lib/swarm_sdk/agent/context.rb CHANGED Viewed

@@ -33,15 +33,19 @@ module SwarmSDK
       # Threshold at which automatic compression is triggered
       COMPRESSION_THRESHOLD = 60
-      attr_reader :name, :delegation_tools, :metadata, :warning_thresholds_hit
+      attr_reader :name, :delegation_tools, :metadata, :warning_thresholds_hit, :swarm_id, :parent_swarm_id
       # Initialize a new agent context
       #
       # @param name [Symbol, String] Agent name
+      # @param swarm_id [String] Swarm ID for event tracking
+      # @param parent_swarm_id [String, nil] Parent swarm ID (nil for root swarms)
       # @param delegation_tools [Array<String>] Names of tools that are delegations
       # @param metadata [Hash] Optional metadata about the agent
-      def initialize(name:, delegation_tools: [], metadata: {})
+      def initialize(name:, swarm_id:, parent_swarm_id: nil, delegation_tools: [], metadata: {})
         @name = name.to_sym
+        @swarm_id = swarm_id
+        @parent_swarm_id = parent_swarm_id
         @delegation_tools = Set.new(delegation_tools.map(&:to_s))
         @metadata = metadata
         @delegation_call_ids = Set.new

data/lib/swarm_sdk/agent/context_manager.rb CHANGED Viewed

@@ -18,10 +18,16 @@ module SwarmSDK
     class ContextManager
       SYSTEM_REMINDER_REGEX = %r{<system-reminder>.*?</system-reminder>}m
+      # Expose compression state for snapshot/restore
+      # NOTE: @compression_applied initializes to nil (not false), only set to true when compression runs
+      attr_reader :compression_applied
+      attr_writer :compression_applied
       def initialize
         # Ephemeral content to append to messages for this turn only
         # Format: { message_index => [array of reminder strings] }
         @ephemeral_content = {}
+        # NOTE: @compression_applied is NOT initialized here - starts as nil
       end
       # Track ephemeral content to append to a specific message

data/lib/swarm_sdk/agent/definition.rb CHANGED Viewed

@@ -44,13 +44,21 @@ module SwarmSDK
         :agent_permissions,
         :assume_model_exists,
         :hooks,
-        :memory
+        :memory,
+        :shared_across_delegations
       attr_accessor :bypass_permissions, :max_concurrent_tools
       def initialize(name, config = {})
         @name = name.to_sym
+        # Validate name doesn't contain '@' (reserved for delegation instances)
+        if @name.to_s.include?("@")
+          raise ConfigurationError,
+            "Agent names cannot contain '@' character (reserved for delegation instance naming). " \
+              "Agent: #{@name}"
+        end
         # BREAKING CHANGE: Hard error for plural form
         if config[:directories]
           raise ConfigurationError,
@@ -96,6 +104,9 @@ module SwarmSDK
         # (memory prompt needs to be appended if memory is enabled)
         @memory = parse_memory_config(config[:memory])
+        # Delegation isolation mode (default: false = isolated instances per delegation)
+        @shared_across_delegations = config[:shared_across_delegations] || false
         # Build system prompt after directory and memory are set
         @system_prompt = build_full_system_prompt(config[:system_prompt])
@@ -111,7 +122,7 @@ module SwarmSDK
         # Inject default write restrictions for security
         @tools = inject_default_write_permissions(@tools)
-        @delegates_to = Array(config[:delegates_to] || []).map(&:to_sym)
+        @delegates_to = Array(config[:delegates_to] || []).map(&:to_sym).uniq
         @mcp_servers = Array(config[:mcp_servers] || [])
         # Parse hooks configuration
@@ -181,6 +192,7 @@ module SwarmSDK
           assume_model_exists: @assume_model_exists,
           max_concurrent_tools: @max_concurrent_tools,
           hooks: @hooks,
+          shared_across_delegations: @shared_across_delegations,
           # Permissions are core SDK functionality (not plugin-specific)
           default_permissions: @default_permissions,
           permissions: @agent_permissions,
@@ -358,7 +370,7 @@ module SwarmSDK
       def render_non_coding_base_prompt
         # Simplified base prompt for non-coding agents
-        # Includes environment info, TODO, and Scratchpad tool information
+        # Includes environment info only
         # Does not steer towards coding tasks
         cwd = @directory || Dir.pwd
         platform = RUBY_PLATFORM
@@ -383,25 +395,6 @@ module SwarmSDK
           Platform: #{platform}
           OS Version: #{os_version}
           </env>
-          # Task Management
-          You have access to the TodoWrite tool to help you manage and plan tasks. Use this tool to track your progress and give visibility into your work.
-          When working on multi-step tasks:
-          1. Create a todo list with all known tasks before starting work
-          2. Mark each task as in_progress when you start it
-          3. Mark each task as completed IMMEDIATELY after finishing it
-          4. Complete ALL pending todos before finishing your response
-          # Scratchpad Storage
-          You have access to Scratchpad tools for storing and retrieving information:
-          - **ScratchpadWrite**: Store detailed outputs, analysis, or results that are too long for direct responses
-          - **ScratchpadRead**: Retrieve previously stored content
-          - **ScratchpadList**: List available scratchpad entries
-          Use the scratchpad to share information that would otherwise clutter your responses.
         PROMPT
       end

data/lib/swarm_sdk/agent/llm_instrumentation_middleware.rb ADDED Viewed

@@ -0,0 +1,180 @@
+# frozen_string_literal: true
+module SwarmSDK
+  module Agent
+    # Faraday middleware for capturing LLM API requests and responses
+    #
+    # This middleware intercepts HTTP calls to LLM providers and emits
+    # structured events via LogStream for logging and monitoring.
+    #
+    # Events emitted:
+    # - llm_api_request: Before sending request to LLM API
+    # - llm_api_response: After receiving response from LLM API
+    #
+    # The middleware is injected at runtime into the provider's Faraday
+    # connection stack (see Agent::Chat#inject_llm_instrumentation).
+    class LLMInstrumentationMiddleware < Faraday::Middleware
+      # Initialize middleware
+      #
+      # @param app [Faraday::Connection] Faraday app
+      # @param on_request [Proc] Callback for request events
+      # @param on_response [Proc] Callback for response events
+      # @param provider_name [String] Provider name for logging
+      def initialize(app, on_request:, on_response:, provider_name:)
+        super(app)
+        @on_request = on_request
+        @on_response = on_response
+        @provider_name = provider_name
+      end
+      # Intercept HTTP call
+      #
+      # @param env [Faraday::Env] Request environment
+      # @return [Faraday::Response] HTTP response
+      def call(env)
+        start_time = Time.now
+        # Emit request event
+        emit_request_event(env, start_time)
+        # Execute request
+        @app.call(env).on_complete do |response_env|
+          end_time = Time.now
+          duration = end_time - start_time
+          # Emit response event
+          emit_response_event(response_env, start_time, end_time, duration)
+        end
+      end
+      private
+      # Emit request event
+      #
+      # @param env [Faraday::Env] Request environment
+      # @param timestamp [Time] Request timestamp
+      # @return [void]
+      def emit_request_event(env, timestamp)
+        request_data = {
+          provider: @provider_name,
+          body: parse_body(env.body),
+          timestamp: timestamp.utc.iso8601,
+        }
+        @on_request.call(request_data)
+      rescue StandardError => e
+        # Don't let logging errors break the request
+        RubyLLM.logger.error("LLM instrumentation request error: #{e.message}")
+      end
+      # Emit response event
+      #
+      # @param env [Faraday::Env] Response environment
+      # @param start_time [Time] Request start time
+      # @param end_time [Time] Request end time
+      # @param duration [Float] Request duration in seconds
+      # @return [void]
+      def emit_response_event(env, start_time, end_time, duration)
+        response_data = {
+          provider: @provider_name,
+          body: parse_body(env.body),
+          duration_seconds: duration.round(3),
+          timestamp: end_time.utc.iso8601,
+        }
+        # Extract usage information from response body if available
+        if env.body.is_a?(String) && !env.body.empty?
+          begin
+            parsed = JSON.parse(env.body)
+            response_data[:usage] = extract_usage(parsed) if parsed.is_a?(Hash)
+            response_data[:model] = parsed["model"] if parsed.is_a?(Hash)
+            response_data[:finish_reason] = extract_finish_reason(parsed) if parsed.is_a?(Hash)
+          rescue JSON::ParserError
+            # Not JSON, skip usage extraction
+          end
+        end
+        @on_response.call(response_data)
+      rescue StandardError => e
+        # Don't let logging errors break the response
+        RubyLLM.logger.error("LLM instrumentation response error: #{e.message}")
+      end
+      # Sanitize headers by removing sensitive data
+      #
+      # @param headers [Hash] HTTP headers
+      # @return [Hash] Sanitized headers
+      def sanitize_headers(headers)
+        return {} unless headers
+        headers.transform_keys(&:to_s).transform_values do |value|
+          # Redact authorization headers
+          if value.to_s.match?(/bearer|token|key/i)
+            "[REDACTED]"
+          else
+            value.to_s
+          end
+        end
+      rescue StandardError
+        {}
+      end
+      # Parse request/response body
+      #
+      # @param body [String, Hash, nil] HTTP body
+      # @return [Hash, String, nil] Parsed body
+      def parse_body(body)
+        return if body.nil? || body == ""
+        # Already parsed
+        return body if body.is_a?(Hash)
+        # Try to parse JSON
+        JSON.parse(body)
+      rescue JSON::ParserError
+        # Return truncated string if not JSON
+        body.to_s[0..1000]
+      rescue StandardError
+        nil
+      end
+      # Extract usage statistics from response
+      #
+      # Handles different provider formats (OpenAI, Anthropic, etc.)
+      #
+      # @param parsed [Hash] Parsed response body
+      # @return [Hash, nil] Usage statistics
+      def extract_usage(parsed)
+        usage = parsed["usage"] || parsed.dig("usage")
+        return unless usage
+        {
+          input_tokens: usage["input_tokens"] || usage["prompt_tokens"],
+          output_tokens: usage["output_tokens"] || usage["completion_tokens"],
+          total_tokens: usage["total_tokens"],
+        }.compact
+      rescue StandardError
+        nil
+      end
+      # Extract finish reason from response
+      #
+      # Handles different provider formats
+      #
+      # @param parsed [Hash] Parsed response body
+      # @return [String, nil] Finish reason
+      def extract_finish_reason(parsed)
+        # Anthropic format
+        return parsed["stop_reason"] if parsed["stop_reason"]
+        # OpenAI format
+        choices = parsed["choices"]
+        return unless choices&.is_a?(Array) && !choices.empty?
+        choices.first["finish_reason"]
+      rescue StandardError
+        nil
+      end
+    end
+  end
+end