RubyGems - llm_cost_tracker - Versions diffs - 0.8.0 → 0.9.0 - Mend

llm_cost_tracker 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (125) hide show

data/lib/llm_cost_tracker/integrations/openai.rb CHANGED Viewed

@@ -6,7 +6,7 @@ require_relative "../parsers/openai_service_charges"
 module LlmCostTracker
   module Integrations
-    module Openai
+    module Openai # rubocop:disable Metrics/ModuleLength
       extend Base
       class << self
@@ -14,8 +14,30 @@ module LlmCostTracker
           :openai
         end
-        def stream_pricing_mode(request)
-          Pricing.normalize_mode((request || {})[:service_tier])
+        def stream_pricing_mode(request, host: nil)
+          LlmCostTracker::Parsers::OpenaiUsage.combined_pricing_mode(
+            host: host,
+            model: (request || {})[:model],
+            service_tier: (request || {})[:service_tier]
+          )
+        end
+        def stream_collector(request, host: nil)
+          LlmCostTracker::Capture::StreamCollector.new(
+            provider: integration_name.to_s,
+            model: request[:model],
+            pricing_mode: stream_pricing_mode(request, host: host),
+            request: request
+          )
+        end
+        def client_host_for(resource)
+          client = resource.instance_variable_get(:@client)
+          return nil unless client.respond_to?(:base_url, true)
+          URI.parse(client.send(:base_url).to_s).host
+        rescue URI::InvalidURIError
+          nil
         end
         def minimum_version
@@ -28,20 +50,40 @@ module LlmCostTracker
         def patch_targets
           [
-            patch_target(
-              "OpenAI::Resources::Responses",
-              with: ResponsesPatch,
-              methods: %i[create stream stream_raw retrieve_streaming]
-            ),
-            patch_target(
-              "OpenAI::Resources::Chat::Completions",
-              with: ChatCompletionsPatch,
-              methods: %i[create stream_raw]
-            )
+            patch_target("OpenAI::Resources::Responses",
+                         with: ResponsesPatch, methods: %i[create stream stream_raw retrieve_streaming]),
+            patch_target("OpenAI::Resources::Chat::Completions",
+                         with: ChatCompletionsPatch, methods: %i[create stream stream_raw]),
+            *auxiliary_patch_targets
+          ]
+        end
+        def auxiliary_patch_targets
+          [
+            patch_target("OpenAI::Resources::Embeddings",
+                         with: EmbeddingsPatch, methods: %i[create], optional: true),
+            patch_target("OpenAI::Resources::Images",
+                         with: ImagesPatch, methods: %i[generate edit create_variation], optional: true),
+            patch_target("OpenAI::Resources::Images",
+                         with: StreamingImagesPatch,
+                         methods: %i[generate_stream_raw edit_stream_raw],
+                         optional: true, skip_when_methods_missing: true),
+            patch_target("OpenAI::Resources::Audio::Transcriptions",
+                         with: TranscriptionsPatch, methods: %i[create], optional: true),
+            patch_target("OpenAI::Resources::Audio::Transcriptions",
+                         with: StreamingTranscriptionsPatch,
+                         methods: %i[create_streaming],
+                         optional: true, skip_when_methods_missing: true),
+            patch_target("OpenAI::Resources::Audio::Translations",
+                         with: TranslationsPatch, methods: %i[create], optional: true),
+            patch_target("OpenAI::Resources::Audio::Speech",
+                         with: SpeechPatch, methods: %i[create], optional: true),
+            patch_target("OpenAI::Resources::Moderations",
+                         with: ModerationsPatch, methods: %i[create], optional: true)
           ]
         end
-        def record_response(response, request:, latency_ms:)
+        def record_response(response, request:, latency_ms:, host: nil)
           return unless active?
           record_safely do
@@ -53,27 +95,141 @@ module LlmCostTracker
             next if input_tokens.nil? && output_tokens.nil?
             cache_read = cache_read_input_tokens(usage)
+            model = object_value(response, :model) || request[:model]
             LlmCostTracker::Tracker.record(
               capture: UsageCapture.build(
                 provider: "openai",
-                model: object_value(response, :model) || request[:model],
-                pricing_mode: object_value(response, :service_tier) || request[:service_tier],
-                token_usage: token_usage(usage:, input_tokens:, output_tokens:, cache_read:),
+                model: model,
+                pricing_mode: LlmCostTracker::Parsers::OpenaiUsage.combined_pricing_mode(
+                  host: host,
+                  model: model,
+                  service_tier: object_value(response, :service_tier) || request[:service_tier]
+                ),
+                token_usage: token_usage(usage:, input_tokens:, output_tokens:, cache_read:, model: model),
                 usage_source: :sdk_response,
                 provider_response_id: object_value(response, :id),
-                service_line_items: service_line_items_from(response)
+                service_line_items: service_line_items_from(response, request: request)
               ),
               latency_ms: latency_ms
             )
           end
         end
-        def service_line_items_from(response)
+        def record_image(response, request:, latency_ms:)
+          usage = object_value(response, :usage)
+          raw_input = usage ? object_value(usage, :input_tokens).to_i : 0
+          raw_output = usage ? object_value(usage, :output_tokens).to_i : 0
+          image_input = image_input_tokens(usage).to_i
+          cache_read = cache_read_input_tokens(usage).to_i
+          text_input = [raw_input - image_input - cache_read, 0].max
+          image_output, text_output = split_image_output(usage, raw_output)
+          record_passthrough(
+            model: request[:model],
+            response: response,
+            latency_ms: latency_ms,
+            input_tokens: text_input,
+            image_input_tokens: image_input,
+            output_tokens: text_output,
+            image_output_tokens: image_output,
+            cache_read_input_tokens: cache_read
+          )
+        end
+        def split_image_output(usage, raw_output)
+          image_tokens = image_output_tokens(usage).to_i
+          text_tokens = text_output_tokens(usage).to_i
+          return [raw_output, 0] if image_tokens.zero? && text_tokens.zero?
+          text_tokens = [raw_output - image_tokens, 0].max if text_tokens.zero?
+          [image_tokens, text_tokens]
+        end
+        def record_transcription(response, request:, latency_ms:)
+          record_passthrough(
+            model: request[:model],
+            response: response,
+            latency_ms: latency_ms,
+            **transcription_token_attributes(object_value(response, :usage))
+          )
+        end
+        def transcription_token_attributes(usage)
+          return { input_tokens: 0, output_tokens: 0 } unless usage && object_value(usage, :type).to_s == "tokens"
+          raw_input = object_value(usage, :input_tokens).to_i
+          audio_input = object_dig(usage, :input_token_details, :audio_tokens).to_i
+          {
+            input_tokens: [raw_input - audio_input, 0].max,
+            audio_input_tokens: audio_input,
+            output_tokens: object_value(usage, :output_tokens).to_i
+          }
+        end
+        def record_speech(_response, request:, latency_ms:)
+          record_passthrough(
+            model: request[:model],
+            response: nil,
+            latency_ms: latency_ms,
+            input_tokens: 0,
+            output_tokens: 0,
+            service_line_items: speech_line_items(request)
+          )
+        end
+        CHARACTER_BILLED_TTS_MODELS = /\Atts-1(-hd)?\z/
+        private_constant :CHARACTER_BILLED_TTS_MODELS
+        def speech_line_items(request)
+          input = request[:input]
+          return [] unless input.is_a?(String)
+          return [] unless CHARACTER_BILLED_TTS_MODELS.match?(request[:model].to_s)
+          [LlmCostTracker::Billing::LineItem.build(
+            component_key: :text_to_speech_character,
+            quantity: input.length,
+            cost_status: LlmCostTracker::Billing::CostStatus::UNKNOWN,
+            pricing_basis: :provider_usage,
+            provider_field: "request.input"
+          )]
+        end
+        def record_moderation(response, request:, latency_ms:)
+          record_passthrough(
+            model: object_value(response, :model) || request[:model],
+            response: response,
+            latency_ms: latency_ms,
+            input_tokens: 0,
+            output_tokens: 0
+          )
+        end
+        def record_passthrough(model:, response:, latency_ms:, service_line_items: [], **token_attributes)
+          return unless active?
+          record_safely do
+            LlmCostTracker::Tracker.record(
+              capture: UsageCapture.build(
+                provider: "openai",
+                model: model,
+                token_usage: TokenUsage.build(**token_attributes),
+                usage_source: :sdk_response,
+                provider_response_id: response && object_value(response, :id),
+                service_line_items: service_line_items
+              ),
+              latency_ms: latency_ms
+            )
+          end
+        end
+        def service_line_items_from(response, request: nil)
           output = object_value(response, :output)
           return [] unless output.respond_to?(:each)
-          LlmCostTracker::Parsers::OpenaiServiceCharges
-            .line_items_from_output(output.map { |item| normalize_output_item(item) })
+          LlmCostTracker::Parsers::OpenaiServiceCharges.line_items_from_output(
+            output.map { |item| normalize_output_item(item) },
+            request: request,
+            model: object_value(response, :model) || request&.dig(:model)
+          )
         end
         def normalize_output_item(item)
@@ -81,7 +237,7 @@ module LlmCostTracker
           return nil if item.nil?
           {
-            "type" => object_value(item, :type),
+            "type" => object_value(item, :type)&.to_s,
             "id" => object_value(item, :id),
             "status" => object_value(item, :status),
             "container_id" => object_value(item, :container_id),
@@ -93,19 +249,34 @@ module LlmCostTracker
           return nil if action.nil?
           return action if action.is_a?(Hash)
-          { "type" => object_value(action, :type) }
+          { "type" => object_value(action, :type)&.to_s }
         end
-        def token_usage(usage:, input_tokens:, output_tokens:, cache_read:)
+        IMAGE_OUTPUT_MODEL_PATTERN = /\Agpt-image-/i
+        private_constant :IMAGE_OUTPUT_MODEL_PATTERN
+        def token_usage(usage:, input_tokens:, output_tokens:, cache_read:, model: nil)
           audio_input = audio_input_tokens(usage)
           audio_output = audio_output_tokens(usage)
+          image_input = image_input_tokens(usage)
+          image_output_details = image_output_tokens(usage)
+          text_output_details = text_output_tokens(usage)
+          image_output, regular_output = split_responses_image_output(
+            output_tokens: output_tokens.to_i,
+            image_output_details: image_output_details,
+            text_output_details: text_output_details,
+            audio_output: audio_output,
+            default_to_image: model.to_s.match?(IMAGE_OUTPUT_MODEL_PATTERN)
+          )
           TokenUsage.build(
-            input_tokens: regular_input_tokens(input_tokens, cache_read, audio_input),
-            output_tokens: regular_output_tokens(output_tokens, audio_output),
+            input_tokens: regular_input_tokens(input_tokens, cache_read, audio_input, image_input),
+            output_tokens: regular_output,
             cache_read_input_tokens: cache_read,
             audio_input_tokens: audio_input,
             audio_output_tokens: audio_output,
+            image_input_tokens: image_input,
+            image_output_tokens: image_output,
             hidden_output_tokens: hidden_output_tokens(usage)
           )
         end
@@ -113,44 +284,36 @@ module LlmCostTracker
         INPUT_DETAIL_KEYS = %i[input_tokens_details input_token_details prompt_tokens_details].freeze
         OUTPUT_DETAIL_KEYS = %i[output_tokens_details output_token_details completion_tokens_details].freeze
-        def cache_read_input_tokens(usage)
-          input_detail(usage, :cached_tokens)
-        end
-        def hidden_output_tokens(usage)
-          output_detail(usage, :reasoning_tokens)
-        end
-        def audio_input_tokens(usage)
-          input_detail(usage, :audio_tokens)
-        end
-        def audio_output_tokens(usage)
-          output_detail(usage, :audio_tokens)
-        end
+        def cache_read_input_tokens(usage) = detail(usage, INPUT_DETAIL_KEYS, :cached_tokens)
+        def hidden_output_tokens(usage)    = detail(usage, OUTPUT_DETAIL_KEYS, :reasoning_tokens)
+        def audio_input_tokens(usage)      = detail(usage, INPUT_DETAIL_KEYS, :audio_tokens)
+        def audio_output_tokens(usage)     = detail(usage, OUTPUT_DETAIL_KEYS, :audio_tokens)
+        def image_input_tokens(usage)      = detail(usage, INPUT_DETAIL_KEYS, :image_tokens)
+        def image_output_tokens(usage)     = detail(usage, OUTPUT_DETAIL_KEYS, :image_tokens)
+        def text_output_tokens(usage)      = detail(usage, OUTPUT_DETAIL_KEYS, :text_tokens)
-        def input_detail(usage, key)
-          INPUT_DETAIL_KEYS.each do |container|
+        def detail(usage, containers, key)
+          containers.each do |container|
             value = object_dig(usage, container, key)
             return value.to_i if value
           end
           0
         end
-        def output_detail(usage, key)
-          OUTPUT_DETAIL_KEYS.each do |container|
-            value = object_dig(usage, container, key)
-            return value.to_i if value
-          end
-          0
+        def regular_input_tokens(input_tokens, cache_read, audio_input, image_input)
+          [input_tokens.to_i - cache_read - audio_input - image_input, 0].max
         end
-        def regular_input_tokens(input_tokens, cache_read, audio_input)
-          [input_tokens.to_i - cache_read - audio_input, 0].max
-        end
+        def split_responses_image_output(output_tokens:, image_output_details:, text_output_details:, audio_output:,
+                                         default_to_image: false)
+          if image_output_details.zero? && text_output_details.zero?
+            remainder = [output_tokens - audio_output, 0].max
+            return default_to_image ? [remainder, 0] : [0, remainder]
+          end
-        def regular_output_tokens(output_tokens, audio_output)
-          [output_tokens.to_i - audio_output, 0].max
+          text_output = text_output_details
+          text_output = [output_tokens - image_output_details - audio_output, 0].max if text_output.zero?
+          [image_output_details, text_output]
         end
       end
@@ -158,11 +321,12 @@ module LlmCostTracker
         def create(*args, **kwargs)
           LlmCostTracker::Integrations::Openai.enforce_budget!
           started_at = LlmCostTracker::Timing.now_monotonic
-          response = super
+          response = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
           LlmCostTracker::Integrations::Openai.record_response(
             response,
             request: LlmCostTracker::Integrations::Openai.request_params(args, kwargs),
-            latency_ms: LlmCostTracker::Integrations::Openai.elapsed_ms(started_at)
+            latency_ms: LlmCostTracker::Integrations::Openai.elapsed_ms(started_at),
+            host: LlmCostTracker::Integrations::Openai.client_host_for(self)
           )
           response
         end
@@ -170,25 +334,28 @@ module LlmCostTracker
         def stream(*args, **kwargs)
           request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
           LlmCostTracker::Integrations::Openai.enforce_budget!
-          collector = LlmCostTracker::Integrations::Openai.stream_collector(request)
-          stream = super
+          host = LlmCostTracker::Integrations::Openai.client_host_for(self)
+          collector = LlmCostTracker::Integrations::Openai.stream_collector(request, host: host)
+          stream = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
           LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
         end
         def stream_raw(*args, **kwargs)
           request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
           LlmCostTracker::Integrations::Openai.enforce_budget!
-          collector = LlmCostTracker::Integrations::Openai.stream_collector(request)
-          stream = super
+          host = LlmCostTracker::Integrations::Openai.client_host_for(self)
+          collector = LlmCostTracker::Integrations::Openai.stream_collector(request, host: host)
+          stream = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
           LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
         end
         def retrieve_streaming(response_id, *args, **kwargs)
           request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
           LlmCostTracker::Integrations::Openai.enforce_budget!
-          collector = LlmCostTracker::Integrations::Openai.stream_collector(request)
+          host = LlmCostTracker::Integrations::Openai.client_host_for(self)
+          collector = LlmCostTracker::Integrations::Openai.stream_collector(request, host: host)
           collector.provider_response_id = response_id
-          stream = super
+          stream = super(response_id, *LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
           LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
         end
       end
@@ -197,20 +364,87 @@ module LlmCostTracker
         def create(*args, **kwargs)
           LlmCostTracker::Integrations::Openai.enforce_budget!
           started_at = LlmCostTracker::Timing.now_monotonic
-          response = super
+          response = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
           LlmCostTracker::Integrations::Openai.record_response(
             response,
             request: LlmCostTracker::Integrations::Openai.request_params(args, kwargs),
-            latency_ms: LlmCostTracker::Integrations::Openai.elapsed_ms(started_at)
+            latency_ms: LlmCostTracker::Integrations::Openai.elapsed_ms(started_at),
+            host: LlmCostTracker::Integrations::Openai.client_host_for(self)
           )
           response
         end
+        def stream(*args, **kwargs)
+          request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
+          LlmCostTracker::Integrations::Openai.enforce_budget!
+          host = LlmCostTracker::Integrations::Openai.client_host_for(self)
+          collector = LlmCostTracker::Integrations::Openai.stream_collector(request, host: host)
+          stream = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
+          LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
+        end
         def stream_raw(*args, **kwargs)
           request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
           LlmCostTracker::Integrations::Openai.enforce_budget!
-          collector = LlmCostTracker::Integrations::Openai.stream_collector(request)
-          stream = super
+          host = LlmCostTracker::Integrations::Openai.client_host_for(self)
+          collector = LlmCostTracker::Integrations::Openai.stream_collector(request, host: host)
+          stream = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
+          LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
+        end
+      end
+      module PatchBuilder
+        module_function
+        def build(record_method:, methods:)
+          Module.new.tap do |mod|
+            methods.each { |method_name| define_wrapped_method(mod, method_name, record_method) }
+          end
+        end
+        def define_wrapped_method(mod, method_name, record_method)
+          mod.define_method(method_name) do |*args, **kwargs, &block|
+            integration = LlmCostTracker::Integrations::Openai
+            integration.enforce_budget!
+            started_at = LlmCostTracker::Timing.now_monotonic
+            response = super(*integration.normalize_sdk_args(args, kwargs), &block)
+            integration.public_send(
+              record_method, response,
+              request: integration.request_params(args, kwargs),
+              latency_ms: integration.elapsed_ms(started_at)
+            )
+            response
+          end
+        end
+      end
+      EmbeddingsPatch = PatchBuilder.build(record_method: :record_response, methods: %i[create])
+      ImagesPatch = PatchBuilder.build(record_method: :record_image, methods: %i[generate edit create_variation])
+      TranscriptionsPatch = PatchBuilder.build(record_method: :record_transcription, methods: %i[create])
+      TranslationsPatch = PatchBuilder.build(record_method: :record_transcription, methods: %i[create])
+      SpeechPatch = PatchBuilder.build(record_method: :record_speech, methods: %i[create])
+      ModerationsPatch = PatchBuilder.build(record_method: :record_moderation, methods: %i[create])
+      module StreamingImagesPatch
+        %i[generate_stream_raw edit_stream_raw].each do |method_name|
+          define_method(method_name) do |*args, **kwargs|
+            request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
+            LlmCostTracker::Integrations::Openai.enforce_budget!
+            host = LlmCostTracker::Integrations::Openai.client_host_for(self)
+            collector = LlmCostTracker::Integrations::Openai.stream_collector(request, host: host)
+            stream = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
+            LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
+          end
+        end
+      end
+      module StreamingTranscriptionsPatch
+        def create_streaming(*args, **kwargs)
+          request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
+          LlmCostTracker::Integrations::Openai.enforce_budget!
+          host = LlmCostTracker::Integrations::Openai.client_host_for(self)
+          collector = LlmCostTracker::Integrations::Openai.stream_collector(request, host: host)
+          stream = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
           LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
         end
       end

data/lib/llm_cost_tracker/integrations/ruby_llm.rb CHANGED Viewed

@@ -25,7 +25,7 @@ module LlmCostTracker
             patch_target(
               "RubyLLM::Provider",
               with: ProviderPatch,
-              methods: %i[slug complete embed transcribe]
+              methods: %i[slug complete embed transcribe paint moderate]
             )
           ]
         end
@@ -65,6 +65,69 @@ module LlmCostTracker
           )
         end
+        def record_image(provider, response, request:, latency_ms:)
+          usage = object_value(response, :usage)
+          usage = {} unless usage.is_a?(Hash)
+          raw_input = (usage[:input_tokens] || usage["input_tokens"]).to_i
+          raw_output = (usage[:output_tokens] || usage["output_tokens"]).to_i
+          image_input = image_token_detail(usage, :input)
+          image_output = image_token_detail(usage, :output)
+          text_input = [raw_input - image_input, 0].max
+          text_output = [raw_output - image_output, 0].max
+          record_passthrough(
+            provider: provider_slug(provider),
+            model: response_model_id(response) || model_id(request[:model]),
+            response: response,
+            latency_ms: latency_ms,
+            input_tokens: text_input,
+            image_input_tokens: image_input,
+            output_tokens: text_output,
+            image_output_tokens: image_output
+          )
+        end
+        def record_moderation(provider, response, request:, latency_ms:)
+          record_passthrough(
+            provider: provider_slug(provider),
+            model: response_model_id(response) || model_id(request[:model]),
+            response: response,
+            latency_ms: latency_ms,
+            input_tokens: 0,
+            output_tokens: 0
+          )
+        end
+        def image_token_detail(usage, direction)
+          container_key = direction == :input ? :input_tokens_details : :output_tokens_details
+          details = usage[container_key] || usage[container_key.to_s] || {}
+          return 0 unless details.is_a?(Hash)
+          (details[:image_tokens] || details["image_tokens"]).to_i
+        end
+        def record_passthrough(provider:, model:, response:, latency_ms:, input_tokens:, output_tokens:,
+                               image_input_tokens: 0, image_output_tokens: 0)
+          return unless active?
+          record_safely do
+            LlmCostTracker::Tracker.record(
+              capture: UsageCapture.build(
+                provider: provider,
+                model: model,
+                token_usage: TokenUsage.build(
+                  input_tokens: input_tokens,
+                  output_tokens: output_tokens,
+                  image_input_tokens: image_input_tokens,
+                  image_output_tokens: image_output_tokens
+                ),
+                usage_source: :sdk_response,
+                provider_response_id: provider_response_id(response)
+              ),
+              latency_ms: latency_ms
+            )
+          end
+        end
         def record_usage(provider:, model:, response:, latency_ms:, stream:, output_tokens: nil)
           return unless active?
@@ -80,7 +143,7 @@ module LlmCostTracker
               capture: UsageCapture.build(
                 provider: provider,
                 model: model,
-                pricing_mode: pricing_mode(response),
+                pricing_mode: pricing_mode(provider: provider, response: response),
                 token_usage: TokenUsage.build(
                   input_tokens: regular_input_tokens(input_tokens, cache_read),
                   output_tokens: output_tokens.to_i,
@@ -122,10 +185,16 @@ module LlmCostTracker
           object_value(response, :id, :provider_response_id) || object_dig(response, :raw, :id)
         end
-        def pricing_mode(response)
-          object_value(response, :pricing_mode, :service_tier) ||
-            object_dig(response, :raw, :pricing_mode) ||
-            object_dig(response, :raw, :service_tier)
+        ANTHROPIC_STANDARD_EQUIVALENT_SERVICE_TIERS = %w[standard standard_only priority].freeze
+        private_constant :ANTHROPIC_STANDARD_EQUIVALENT_SERVICE_TIERS
+        def pricing_mode(provider:, response:)
+          raw = object_value(response, :pricing_mode, :service_tier) ||
+                object_dig(response, :raw, :pricing_mode) ||
+                object_dig(response, :raw, :service_tier)
+          return nil if provider == "anthropic" && ANTHROPIC_STANDARD_EQUIVALENT_SERVICE_TIERS.include?(raw.to_s)
+          raw
         end
       end
@@ -175,6 +244,36 @@ module LlmCostTracker
           )
           response
         end
+        def paint(*args, **kwargs)
+          integration = LlmCostTracker::Integrations::RubyLlm
+          request = integration.request_params(args, kwargs)
+          integration.enforce_budget!
+          started_at = LlmCostTracker::Timing.now_monotonic
+          response = super
+          integration.record_image(
+            self,
+            response,
+            request: request,
+            latency_ms: integration.elapsed_ms(started_at)
+          )
+          response
+        end
+        def moderate(*args, **kwargs)
+          integration = LlmCostTracker::Integrations::RubyLlm
+          request = integration.request_params(args, kwargs)
+          integration.enforce_budget!
+          started_at = LlmCostTracker::Timing.now_monotonic
+          response = super
+          integration.record_moderation(
+            self,
+            response,
+            request: request,
+            latency_ms: integration.elapsed_ms(started_at)
+          )
+          response
+        end
       end
     end
   end

data/lib/llm_cost_tracker/integrations.rb CHANGED Viewed

@@ -1,6 +1,7 @@
 # frozen_string_literal: true
 require_relative "errors"
+require_relative "logging"
 require_relative "integrations/openai"
 require_relative "integrations/anthropic"
 require_relative "integrations/ruby_llm"
@@ -13,10 +14,14 @@ module LlmCostTracker
       ruby_llm: RubyLlm
     }.freeze
+    DOUBLE_INSTRUMENTATION_OVERLAPS = %i[openai anthropic].freeze
     module_function
     def install!(names = LlmCostTracker.configuration.instrumented_integrations)
-      normalize(names).each { |name| fetch(name).install }
+      normalized = normalize(names)
+      warn_double_instrumentation(normalized)
+      normalized.each { |name| fetch(name).install }
     end
     def checks(names = LlmCostTracker.configuration.instrumented_integrations)
@@ -29,6 +34,19 @@ module LlmCostTracker
       Array(names).flatten.uniq
     end
+    def warn_double_instrumentation(names)
+      return unless names.include?(:ruby_llm)
+      overlapping = names & DOUBLE_INSTRUMENTATION_OVERLAPS
+      return if overlapping.empty?
+      Logging.warn(
+        ":ruby_llm is enabled together with #{overlapping.map(&:inspect).join(', ')}. " \
+        "RubyLLM uses HTTP underneath, so calls routed to those providers may be recorded twice " \
+        "(once via the SDK patch, once via the Faraday parser). Pick one path per provider."
+      )
+    end
     def fetch(name)
       AVAILABLE.fetch(name) do
         message = "Unknown integration: #{name.inspect}. Use one of: #{names.join(', ')}"