RubyGems - llm_cost_tracker - Versions diffs - 0.8.0 → 0.10.0 - Mend

llm_cost_tracker 0.8.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (150) hide show

data/lib/llm_cost_tracker/integrations/openai.rb CHANGED Viewed

@@ -3,10 +3,12 @@
 require_relative "base"
 require_relative "../billing/line_item"
 require_relative "../parsers/openai_service_charges"
+require_relative "../providers/azure/hosts"
+require_relative "../providers/openai/model_families"
 module LlmCostTracker
   module Integrations
-    module Openai
+    module Openai # rubocop:disable Metrics/ModuleLength
       extend Base
       class << self
@@ -14,8 +16,43 @@ module LlmCostTracker
           :openai
         end
-        def stream_pricing_mode(request)
-          Pricing.normalize_mode((request || {})[:service_tier])
+        def stream_pricing_mode(request, host: nil)
+          LlmCostTracker::Parsers::OpenaiUsage.combined_pricing_mode(
+            host: host,
+            model: (request || {})[:model],
+            service_tier: (request || {})[:service_tier]
+          )
+        end
+        def stream_collector(request, host: nil)
+          LlmCostTracker::Capture::StreamCollector.new(
+            provider: provider_for_host(host),
+            model: request[:model],
+            pricing_mode: stream_pricing_mode(request, host: host),
+            request: request
+          )
+        end
+        def wrap_stream_call(args, kwargs, resource)
+          request = request_params(args, kwargs)
+          enforce_budget!(request: request)
+          host = client_host_for(resource)
+          collector = stream_collector(request, host: host)
+          stream = yield(normalize_sdk_args(args, kwargs), collector)
+          track_stream(stream, collector: collector)
+        end
+        def client_host_for(resource)
+          client = resource.instance_variable_get(:@client)
+          return nil unless client
+          URI.parse(client.base_url.to_s).host
+        rescue URI::InvalidURIError
+          nil
+        end
+        def provider_for_host(host)
+          LlmCostTracker::Providers::Azure::Hosts.openai?(host) ? "azure_openai" : "openai"
         end
         def minimum_version
@@ -28,20 +65,40 @@ module LlmCostTracker
         def patch_targets
           [
-            patch_target(
-              "OpenAI::Resources::Responses",
-              with: ResponsesPatch,
-              methods: %i[create stream stream_raw retrieve_streaming]
-            ),
-            patch_target(
-              "OpenAI::Resources::Chat::Completions",
-              with: ChatCompletionsPatch,
-              methods: %i[create stream_raw]
-            )
+            patch_target("OpenAI::Resources::Responses",
+                         with: ResponsesPatch, methods: %i[create stream stream_raw retrieve_streaming]),
+            patch_target("OpenAI::Resources::Chat::Completions",
+                         with: ChatCompletionsPatch, methods: %i[create stream stream_raw]),
+            *auxiliary_patch_targets
+          ]
+        end
+        def auxiliary_patch_targets
+          [
+            patch_target("OpenAI::Resources::Embeddings",
+                         with: EmbeddingsPatch, methods: %i[create], optional: true),
+            patch_target("OpenAI::Resources::Images",
+                         with: ImagesPatch, methods: %i[generate edit create_variation], optional: true),
+            patch_target("OpenAI::Resources::Images",
+                         with: StreamingImagesPatch,
+                         methods: %i[generate_stream_raw edit_stream_raw],
+                         optional: true, skip_when_methods_missing: true),
+            patch_target("OpenAI::Resources::Audio::Transcriptions",
+                         with: TranscriptionsPatch, methods: %i[create], optional: true),
+            patch_target("OpenAI::Resources::Audio::Transcriptions",
+                         with: StreamingTranscriptionsPatch,
+                         methods: %i[create_streaming],
+                         optional: true, skip_when_methods_missing: true),
+            patch_target("OpenAI::Resources::Audio::Translations",
+                         with: TranslationsPatch, methods: %i[create], optional: true),
+            patch_target("OpenAI::Resources::Audio::Speech",
+                         with: SpeechPatch, methods: %i[create], optional: true),
+            patch_target("OpenAI::Resources::Moderations",
+                         with: ModerationsPatch, methods: %i[create], optional: true)
           ]
         end
-        def record_response(response, request:, latency_ms:)
+        def record_response(response, request:, latency_ms:, host: nil)
           return unless active?
           record_safely do
@@ -53,27 +110,167 @@ module LlmCostTracker
             next if input_tokens.nil? && output_tokens.nil?
             cache_read = cache_read_input_tokens(usage)
+            model = object_value(response, :model) || request[:model]
             LlmCostTracker::Tracker.record(
-              capture: UsageCapture.build(
-                provider: "openai",
-                model: object_value(response, :model) || request[:model],
-                pricing_mode: object_value(response, :service_tier) || request[:service_tier],
-                token_usage: token_usage(usage:, input_tokens:, output_tokens:, cache_read:),
+              event: Event.build(
+                provider: provider_for_host(host),
+                model: model,
+                pricing_mode: LlmCostTracker::Parsers::OpenaiUsage.combined_pricing_mode(
+                  host: host,
+                  model: model,
+                  service_tier: object_value(response, :service_tier) || request[:service_tier]
+                ),
+                token_usage: token_usage(usage:, input_tokens:, output_tokens:, cache_read:, model: model),
                 usage_source: :sdk_response,
                 provider_response_id: object_value(response, :id),
-                service_line_items: service_line_items_from(response)
+                service_line_items: service_line_items_from(response, request: request)
+              ),
+              latency_ms: latency_ms
+            )
+          end
+        end
+        def record_image(response, request:, latency_ms:, host: nil)
+          usage = object_value(response, :usage)
+          raw_input = usage ? object_value(usage, :input_tokens).to_i : 0
+          raw_output = usage ? object_value(usage, :output_tokens).to_i : 0
+          image_input = image_input_tokens(usage).to_i
+          cache_read = cache_read_input_tokens(usage).to_i
+          text_input = [raw_input - image_input - cache_read, 0].max
+          image_output, text_output = split_image_output(usage, raw_output)
+          record_passthrough(
+            model: request[:model],
+            response: response,
+            latency_ms: latency_ms,
+            host: host,
+            input_tokens: text_input,
+            image_input_tokens: image_input,
+            output_tokens: text_output,
+            image_output_tokens: image_output,
+            cache_read_input_tokens: cache_read
+          )
+        end
+        def split_image_output(usage, raw_output)
+          image_tokens = image_output_tokens(usage).to_i
+          text_tokens = text_output_tokens(usage).to_i
+          return [raw_output, 0] if image_tokens.zero? && text_tokens.zero?
+          text_tokens = [raw_output - image_tokens, 0].max if text_tokens.zero?
+          [image_tokens, text_tokens]
+        end
+        def record_transcription(response, request:, latency_ms:, host: nil)
+          record_passthrough(
+            model: request[:model],
+            response: response,
+            latency_ms: latency_ms,
+            host: host,
+            **transcription_token_attributes(object_value(response, :usage))
+          )
+        end
+        def transcription_token_attributes(usage)
+          return { input_tokens: 0, output_tokens: 0 } unless usage && object_value(usage, :type).to_s == "tokens"
+          raw_input = object_value(usage, :input_tokens).to_i
+          audio_input = object_dig(usage, :input_token_details, :audio_tokens).to_i
+          {
+            input_tokens: [raw_input - audio_input, 0].max,
+            audio_input_tokens: audio_input,
+            output_tokens: object_value(usage, :output_tokens).to_i
+          }
+        end
+        def record_speech(_response, request:, latency_ms:, host: nil)
+          record_passthrough(
+            model: request[:model],
+            response: nil,
+            latency_ms: latency_ms,
+            host: host,
+            input_tokens: 0,
+            output_tokens: 0,
+            service_line_items: speech_line_items(request)
+          )
+        end
+        def speech_line_items(request)
+          input = request[:input]
+          return [] unless input.is_a?(String)
+          return [] unless LlmCostTracker::Providers::Openai::ModelFamilies.character_billed_tts?(request[:model])
+          [LlmCostTracker::Billing::LineItem.build(
+            component_key: :text_to_speech_character,
+            quantity: input.length,
+            cost_status: LlmCostTracker::Billing::CostStatus::UNKNOWN,
+            pricing_basis: :provider_usage,
+            provider_field: "request.input"
+          )]
+        end
+        def record_moderation(response, request:, latency_ms:, host: nil)
+          record_passthrough(
+            model: object_value(response, :model) || request[:model],
+            response: response,
+            latency_ms: latency_ms,
+            host: host,
+            input_tokens: 0,
+            output_tokens: 0
+          )
+        end
+        def record_passthrough(model:, response:, latency_ms:, host: nil, service_line_items: [], **token_attributes)
+          return unless active?
+          record_safely do
+            LlmCostTracker::Tracker.record(
+              event: Event.build(
+                provider: provider_for_host(host),
+                model: model,
+                token_usage: TokenUsage.build(**token_attributes),
+                usage_source: :sdk_response,
+                provider_response_id: response && object_value(response, :id),
+                service_line_items: service_line_items
               ),
               latency_ms: latency_ms
             )
           end
         end
-        def service_line_items_from(response)
+        def service_line_items_from(response, request: nil)
+          model = object_value(response, :model) || request&.dig(:model)
           output = object_value(response, :output)
-          return [] unless output.respond_to?(:each)
+          output_items = output.respond_to?(:each) ? output.map { |item| normalize_output_item(item) }.compact : []
+          chat_search = output_items.empty? ? chat_completions_search_item(response, model: model) : nil
+          output_items << chat_search if chat_search
+          return [] if output_items.empty?
+          LlmCostTracker::Parsers::OpenaiServiceCharges.line_items_from_output(
+            output_items, request: request, model: model
+          )
+        end
+        def chat_completions_search_item(response, model: nil)
+          choices = object_value(response, :choices)
+          return nil unless choices.respond_to?(:any?)
-          LlmCostTracker::Parsers::OpenaiServiceCharges
-            .line_items_from_output(output.map { |item| normalize_output_item(item) })
+          provider_field = if choices.any? { |choice| choice_used_url_citation?(choice) }
+                             LlmCostTracker::Parsers::OpenaiServiceCharges::CHAT_COMPLETIONS_ANNOTATION_PROVIDER_FIELD
+                           elsif LlmCostTracker::Providers::Openai::ModelFamilies.chat_completions_search?(model)
+                             LlmCostTracker::Parsers::OpenaiServiceCharges::CHAT_COMPLETIONS_SEARCH_MODEL_PROVIDER_FIELD
+                           end
+          return nil unless provider_field
+          { "type" => "web_search_call", "id" => object_value(response, :id),
+            "action" => { "type" => "search" }, "provider_field" => provider_field }
+        end
+        def choice_used_url_citation?(choice)
+          message = object_value(choice, :message)
+          annotations = message && object_value(message, :annotations)
+          return false unless annotations.respond_to?(:any?)
+          annotations.any? { |annotation| object_value(annotation, :type).to_s == "url_citation" }
         end
         def normalize_output_item(item)
@@ -81,7 +278,7 @@ module LlmCostTracker
           return nil if item.nil?
           {
-            "type" => object_value(item, :type),
+            "type" => object_value(item, :type)&.to_s,
             "id" => object_value(item, :id),
             "status" => object_value(item, :status),
             "container_id" => object_value(item, :container_id),
@@ -93,19 +290,31 @@ module LlmCostTracker
           return nil if action.nil?
           return action if action.is_a?(Hash)
-          { "type" => object_value(action, :type) }
+          { "type" => object_value(action, :type)&.to_s }
         end
-        def token_usage(usage:, input_tokens:, output_tokens:, cache_read:)
+        def token_usage(usage:, input_tokens:, output_tokens:, cache_read:, model: nil)
           audio_input = audio_input_tokens(usage)
           audio_output = audio_output_tokens(usage)
+          image_input = image_input_tokens(usage)
+          image_output_details = image_output_tokens(usage)
+          text_output_details = text_output_tokens(usage)
+          image_output, regular_output = split_responses_image_output(
+            output_tokens: output_tokens.to_i,
+            image_output_details: image_output_details,
+            text_output_details: text_output_details,
+            audio_output: audio_output,
+            default_to_image: LlmCostTracker::Providers::Openai::ModelFamilies.image_output?(model)
+          )
           TokenUsage.build(
-            input_tokens: regular_input_tokens(input_tokens, cache_read, audio_input),
-            output_tokens: regular_output_tokens(output_tokens, audio_output),
+            input_tokens: regular_input_tokens(input_tokens, cache_read, audio_input, image_input),
+            output_tokens: regular_output,
             cache_read_input_tokens: cache_read,
             audio_input_tokens: audio_input,
             audio_output_tokens: audio_output,
+            image_input_tokens: image_input,
+            image_output_tokens: image_output,
             hidden_output_tokens: hidden_output_tokens(usage)
           )
         end
@@ -113,104 +322,156 @@ module LlmCostTracker
         INPUT_DETAIL_KEYS = %i[input_tokens_details input_token_details prompt_tokens_details].freeze
         OUTPUT_DETAIL_KEYS = %i[output_tokens_details output_token_details completion_tokens_details].freeze
-        def cache_read_input_tokens(usage)
-          input_detail(usage, :cached_tokens)
-        end
-        def hidden_output_tokens(usage)
-          output_detail(usage, :reasoning_tokens)
-        end
-        def audio_input_tokens(usage)
-          input_detail(usage, :audio_tokens)
-        end
-        def audio_output_tokens(usage)
-          output_detail(usage, :audio_tokens)
-        end
+        def cache_read_input_tokens(usage) = detail(usage, INPUT_DETAIL_KEYS, :cached_tokens)
+        def hidden_output_tokens(usage)    = detail(usage, OUTPUT_DETAIL_KEYS, :reasoning_tokens)
+        def audio_input_tokens(usage)      = detail(usage, INPUT_DETAIL_KEYS, :audio_tokens)
+        def audio_output_tokens(usage)     = detail(usage, OUTPUT_DETAIL_KEYS, :audio_tokens)
+        def image_input_tokens(usage)      = detail(usage, INPUT_DETAIL_KEYS, :image_tokens)
+        def image_output_tokens(usage)     = detail(usage, OUTPUT_DETAIL_KEYS, :image_tokens)
+        def text_output_tokens(usage)      = detail(usage, OUTPUT_DETAIL_KEYS, :text_tokens)
-        def input_detail(usage, key)
-          INPUT_DETAIL_KEYS.each do |container|
+        def detail(usage, containers, key)
+          containers.each do |container|
             value = object_dig(usage, container, key)
             return value.to_i if value
           end
           0
         end
-        def output_detail(usage, key)
-          OUTPUT_DETAIL_KEYS.each do |container|
-            value = object_dig(usage, container, key)
-            return value.to_i if value
-          end
-          0
+        def regular_input_tokens(input_tokens, cache_read, audio_input, image_input)
+          [input_tokens.to_i - cache_read - audio_input - image_input, 0].max
         end
-        def regular_input_tokens(input_tokens, cache_read, audio_input)
-          [input_tokens.to_i - cache_read - audio_input, 0].max
-        end
+        def split_responses_image_output(output_tokens:, image_output_details:, text_output_details:, audio_output:,
+                                         default_to_image: false)
+          if image_output_details.zero? && text_output_details.zero?
+            remainder = [output_tokens - audio_output, 0].max
+            return default_to_image ? [remainder, 0] : [0, remainder]
+          end
-        def regular_output_tokens(output_tokens, audio_output)
-          [output_tokens.to_i - audio_output, 0].max
+          text_output = text_output_details
+          text_output = [output_tokens - image_output_details - audio_output, 0].max if text_output.zero?
+          [image_output_details, text_output]
         end
       end
       module ResponsesPatch
         def create(*args, **kwargs)
-          LlmCostTracker::Integrations::Openai.enforce_budget!
+          request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
+          LlmCostTracker::Integrations::Openai.enforce_budget!(request: request)
           started_at = LlmCostTracker::Timing.now_monotonic
-          response = super
+          response = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
           LlmCostTracker::Integrations::Openai.record_response(
             response,
-            request: LlmCostTracker::Integrations::Openai.request_params(args, kwargs),
-            latency_ms: LlmCostTracker::Integrations::Openai.elapsed_ms(started_at)
+            request: request,
+            latency_ms: LlmCostTracker::Timing.elapsed_ms(started_at),
+            host: LlmCostTracker::Integrations::Openai.client_host_for(self)
           )
           response
         end
         def stream(*args, **kwargs)
-          request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
-          LlmCostTracker::Integrations::Openai.enforce_budget!
-          collector = LlmCostTracker::Integrations::Openai.stream_collector(request)
-          stream = super
-          LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
+          LlmCostTracker::Integrations::Openai.wrap_stream_call(args, kwargs, self) do |normalized, _|
+            super(*normalized)
+          end
         end
         def stream_raw(*args, **kwargs)
-          request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
-          LlmCostTracker::Integrations::Openai.enforce_budget!
-          collector = LlmCostTracker::Integrations::Openai.stream_collector(request)
-          stream = super
-          LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
+          LlmCostTracker::Integrations::Openai.wrap_stream_call(args, kwargs, self) do |normalized, _|
+            super(*normalized)
+          end
         end
         def retrieve_streaming(response_id, *args, **kwargs)
-          request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
-          LlmCostTracker::Integrations::Openai.enforce_budget!
-          collector = LlmCostTracker::Integrations::Openai.stream_collector(request)
-          collector.provider_response_id = response_id
-          stream = super
-          LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
+          LlmCostTracker::Integrations::Openai.wrap_stream_call(args, kwargs, self) do |normalized, collector|
+            collector.provider_response_id = response_id
+            super(response_id, *normalized)
+          end
         end
       end
       module ChatCompletionsPatch
         def create(*args, **kwargs)
-          LlmCostTracker::Integrations::Openai.enforce_budget!
+          request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
+          LlmCostTracker::Integrations::Openai.enforce_budget!(request: request)
           started_at = LlmCostTracker::Timing.now_monotonic
-          response = super
+          response = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
           LlmCostTracker::Integrations::Openai.record_response(
             response,
-            request: LlmCostTracker::Integrations::Openai.request_params(args, kwargs),
-            latency_ms: LlmCostTracker::Integrations::Openai.elapsed_ms(started_at)
+            request: request,
+            latency_ms: LlmCostTracker::Timing.elapsed_ms(started_at),
+            host: LlmCostTracker::Integrations::Openai.client_host_for(self)
           )
           response
         end
+        def stream(*args, **kwargs)
+          LlmCostTracker::Integrations::Openai.wrap_stream_call(args, kwargs, self) do |normalized, _|
+            super(*normalized)
+          end
+        end
         def stream_raw(*args, **kwargs)
+          LlmCostTracker::Integrations::Openai.wrap_stream_call(args, kwargs, self) do |normalized, _|
+            super(*normalized)
+          end
+        end
+      end
+      module PatchBuilder
+        module_function
+        def build(record_method:, methods:)
+          Module.new.tap do |mod|
+            methods.each { |method_name| define_wrapped_method(mod, method_name, record_method) }
+          end
+        end
+        def define_wrapped_method(mod, method_name, record_method)
+          mod.define_method(method_name) do |*args, **kwargs, &block|
+            integration = LlmCostTracker::Integrations::Openai
+            request = integration.request_params(args, kwargs)
+            integration.enforce_budget!(request: request)
+            started_at = LlmCostTracker::Timing.now_monotonic
+            response = super(*integration.normalize_sdk_args(args, kwargs), &block)
+            integration.public_send(
+              record_method, response,
+              request: request,
+              latency_ms: LlmCostTracker::Timing.elapsed_ms(started_at),
+              host: integration.client_host_for(self)
+            )
+            response
+          end
+        end
+      end
+      EmbeddingsPatch = PatchBuilder.build(record_method: :record_response, methods: %i[create])
+      ImagesPatch = PatchBuilder.build(record_method: :record_image, methods: %i[generate edit create_variation])
+      TranscriptionsPatch = PatchBuilder.build(record_method: :record_transcription, methods: %i[create])
+      TranslationsPatch = PatchBuilder.build(record_method: :record_transcription, methods: %i[create])
+      SpeechPatch = PatchBuilder.build(record_method: :record_speech, methods: %i[create])
+      ModerationsPatch = PatchBuilder.build(record_method: :record_moderation, methods: %i[create])
+      module StreamingImagesPatch
+        %i[generate_stream_raw edit_stream_raw].each do |method_name|
+          define_method(method_name) do |*args, **kwargs|
+            request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
+            LlmCostTracker::Integrations::Openai.enforce_budget!(request: request)
+            host = LlmCostTracker::Integrations::Openai.client_host_for(self)
+            collector = LlmCostTracker::Integrations::Openai.stream_collector(request, host: host)
+            stream = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
+            LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
+          end
+        end
+      end
+      module StreamingTranscriptionsPatch
+        def create_streaming(*args, **kwargs)
           request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
-          LlmCostTracker::Integrations::Openai.enforce_budget!
-          collector = LlmCostTracker::Integrations::Openai.stream_collector(request)
-          stream = super
+          LlmCostTracker::Integrations::Openai.enforce_budget!(request: request)
+          host = LlmCostTracker::Integrations::Openai.client_host_for(self)
+          collector = LlmCostTracker::Integrations::Openai.stream_collector(request, host: host)
+          stream = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
           LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
         end
       end