RubyGems - llm_cost_tracker - Versions diffs - 0.10.0 → 0.12.0 - Mend

llm_cost_tracker 0.10.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (209) hide show

data/lib/llm_cost_tracker/integrations/openai.rb CHANGED Viewed

@@ -1,23 +1,25 @@
 # frozen_string_literal: true
 require_relative "base"
-require_relative "../billing/line_item"
-require_relative "../parsers/openai_service_charges"
+require_relative "../capture/sdk_payload"
+require_relative "../charges/line_item"
 require_relative "../providers/azure/hosts"
 require_relative "../providers/openai/model_families"
+require_relative "../providers/openai/service_charges"
+require_relative "../providers/openai/usage_extractor"
+require_relative "openai/patches"
+require_relative "openai/batch_capture"
 module LlmCostTracker
   module Integrations
-    module Openai # rubocop:disable Metrics/ModuleLength
+    module Openai
       extend Base
-      class << self
-        def integration_name
-          :openai
-        end
+      minimum_version "0.59.0"
+      class << self
         def stream_pricing_mode(request, host: nil)
-          LlmCostTracker::Parsers::OpenaiUsage.combined_pricing_mode(
+          LlmCostTracker::Providers::Openai::ResponseParser.combined_pricing_mode(
             host: host,
             model: (request || {})[:model],
             service_tier: (request || {})[:service_tier]
@@ -33,13 +35,12 @@ module LlmCostTracker
           )
         end
-        def wrap_stream_call(args, kwargs, resource)
-          request = request_params(args, kwargs)
-          enforce_budget!(request: request)
+        def stream_seam(resource)
           host = client_host_for(resource)
-          collector = stream_collector(request, host: host)
-          stream = yield(normalize_sdk_args(args, kwargs), collector)
-          track_stream(stream, collector: collector)
+          {
+            provider: provider_for_host(host),
+            collector: ->(request) { stream_collector(request, host: host) }
+          }
         end
         def client_host_for(resource)
@@ -55,46 +56,31 @@ module LlmCostTracker
           LlmCostTracker::Providers::Azure::Hosts.openai?(host) ? "azure_openai" : "openai"
         end
-        def minimum_version
-          "0.59.0"
-        end
-        def version_constant
-          "OpenAI::VERSION"
-        end
         def patch_targets
           [
-            patch_target("OpenAI::Resources::Responses",
-                         with: ResponsesPatch, methods: %i[create stream stream_raw retrieve_streaming]),
-            patch_target("OpenAI::Resources::Chat::Completions",
-                         with: ChatCompletionsPatch, methods: %i[create stream stream_raw]),
+            patch_target("OpenAI::Resources::Responses", with: ResponsesPatch),
+            patch_target("OpenAI::Resources::Chat::Completions", with: ChatCompletionsPatch),
             *auxiliary_patch_targets
           ]
         end
         def auxiliary_patch_targets
           [
-            patch_target("OpenAI::Resources::Embeddings",
-                         with: EmbeddingsPatch, methods: %i[create], optional: true),
-            patch_target("OpenAI::Resources::Images",
-                         with: ImagesPatch, methods: %i[generate edit create_variation], optional: true),
+            patch_target("OpenAI::Resources::Embeddings", with: EmbeddingsPatch, optional: true),
+            patch_target("OpenAI::Resources::Images", with: ImagesPatch, optional: true),
             patch_target("OpenAI::Resources::Images",
                          with: StreamingImagesPatch,
-                         methods: %i[generate_stream_raw edit_stream_raw],
-                         optional: true, skip_when_methods_missing: true),
-            patch_target("OpenAI::Resources::Audio::Transcriptions",
-                         with: TranscriptionsPatch, methods: %i[create], optional: true),
+                                                                               optional: true,
+                         skip_when_methods_missing: true),
+            patch_target("OpenAI::Resources::Audio::Transcriptions", with: TranscriptionsPatch, optional: true),
             patch_target("OpenAI::Resources::Audio::Transcriptions",
                          with: StreamingTranscriptionsPatch,
-                         methods: %i[create_streaming],
-                         optional: true, skip_when_methods_missing: true),
-            patch_target("OpenAI::Resources::Audio::Translations",
-                         with: TranslationsPatch, methods: %i[create], optional: true),
-            patch_target("OpenAI::Resources::Audio::Speech",
-                         with: SpeechPatch, methods: %i[create], optional: true),
-            patch_target("OpenAI::Resources::Moderations",
-                         with: ModerationsPatch, methods: %i[create], optional: true)
+                                                                                              optional: true,
+                         skip_when_methods_missing: true),
+            patch_target("OpenAI::Resources::Audio::Translations", with: TranslationsPatch, optional: true),
+            patch_target("OpenAI::Resources::Audio::Speech", with: SpeechPatch, optional: true),
+            patch_target("OpenAI::Resources::Moderations", with: ModerationsPatch, optional: true),
+            patch_target("OpenAI::Resources::Batches", with: BatchesPatch, optional: true)
           ]
         end
@@ -102,48 +88,43 @@ module LlmCostTracker
           return unless active?
           record_safely do
-            usage = object_value(response, :usage)
-            next unless usage
-            input_tokens = object_value(usage, :input_tokens, :prompt_tokens)
-            output_tokens = object_value(usage, :output_tokens, :completion_tokens)
-            next if input_tokens.nil? && output_tokens.nil?
-            cache_read = cache_read_input_tokens(usage)
-            model = object_value(response, :model) || request[:model]
-            LlmCostTracker::Tracker.record(
-              event: Event.build(
-                provider: provider_for_host(host),
-                model: model,
-                pricing_mode: LlmCostTracker::Parsers::OpenaiUsage.combined_pricing_mode(
-                  host: host,
-                  model: model,
-                  service_tier: object_value(response, :service_tier) || request[:service_tier]
-                ),
-                token_usage: token_usage(usage:, input_tokens:, output_tokens:, cache_read:, model: model),
-                usage_source: :sdk_response,
-                provider_response_id: object_value(response, :id),
-                service_line_items: service_line_items_from(response, request: request)
-              ),
-              latency_ms: latency_ms
+            normalized = LlmCostTracker::Capture::SdkPayload.normalize(response)
+            usage = normalized["usage"]
+            if usage
+              input_tokens = usage["input_tokens"] || usage["prompt_tokens"]
+              output_tokens = usage["output_tokens"] || usage["completion_tokens"]
+              next if input_tokens.nil? && output_tokens.nil?
+            end
+            event = LlmCostTracker::Providers::Openai::ResponseParser.event_from_response(
+              response: normalized,
+              request: request,
+              provider: provider_for_host(host),
+              host: host,
+              usage_source: LlmCostTracker::Usage::Source::SDK_RESPONSE
             )
+            LlmCostTracker::Tracker.record(event: event, latency_ms: latency_ms) if event
           end
         end
         def record_image(response, request:, latency_ms:, host: nil)
-          usage = object_value(response, :usage)
-          raw_input = usage ? object_value(usage, :input_tokens).to_i : 0
-          raw_output = usage ? object_value(usage, :output_tokens).to_i : 0
-          image_input = image_input_tokens(usage).to_i
-          cache_read = cache_read_input_tokens(usage).to_i
-          text_input = [raw_input - image_input - cache_read, 0].max
-          image_output, text_output = split_image_output(usage, raw_output)
+          usage = usage_hash_from(response) || {}
+          raw_input = usage[:input_tokens].to_i
+          image_input = LlmCostTracker::Providers::Openai::UsageExtractor.image_input_tokens(usage)
+          cache_read = LlmCostTracker::Providers::Openai::UsageExtractor.cache_read_input_tokens(usage)
+          image_output, text_output = LlmCostTracker::Providers::Openai::UsageExtractor.split_output(
+            output_tokens: usage[:output_tokens].to_i,
+            image_output_details: LlmCostTracker::Providers::Openai::UsageExtractor.image_output_tokens(usage),
+            text_output_details: LlmCostTracker::Providers::Openai::UsageExtractor.text_output_tokens(usage),
+            audio_output: 0,
+            default_to_image: true
+          )
           record_passthrough(
             model: request[:model],
             response: response,
             latency_ms: latency_ms,
             host: host,
-            input_tokens: text_input,
+            input_tokens: [raw_input - image_input - cache_read, 0].max,
             image_input_tokens: image_input,
             output_tokens: text_output,
             image_output_tokens: image_output,
@@ -151,34 +132,27 @@ module LlmCostTracker
           )
         end
-        def split_image_output(usage, raw_output)
-          image_tokens = image_output_tokens(usage).to_i
-          text_tokens = text_output_tokens(usage).to_i
-          return [raw_output, 0] if image_tokens.zero? && text_tokens.zero?
-          text_tokens = [raw_output - image_tokens, 0].max if text_tokens.zero?
-          [image_tokens, text_tokens]
-        end
         def record_transcription(response, request:, latency_ms:, host: nil)
+          usage = usage_hash_from(response)
           record_passthrough(
             model: request[:model],
             response: response,
             latency_ms: latency_ms,
             host: host,
-            **transcription_token_attributes(object_value(response, :usage))
+            service_line_items: LlmCostTracker::Providers::Openai::ServiceCharges.transcription_line_items(usage),
+            **transcription_token_attributes(usage)
           )
         end
         def transcription_token_attributes(usage)
-          return { input_tokens: 0, output_tokens: 0 } unless usage && object_value(usage, :type).to_s == "tokens"
+          return { input_tokens: 0, output_tokens: 0 } unless usage && usage[:type].to_s == "tokens"
-          raw_input = object_value(usage, :input_tokens).to_i
-          audio_input = object_dig(usage, :input_token_details, :audio_tokens).to_i
+          raw_input = usage[:input_tokens].to_i
+          audio_input = LlmCostTracker::Providers::Openai::UsageExtractor.audio_input_tokens(usage)
           {
             input_tokens: [raw_input - audio_input, 0].max,
             audio_input_tokens: audio_input,
-            output_tokens: object_value(usage, :output_tokens).to_i
+            output_tokens: usage[:output_tokens].to_i
           }
         end
@@ -199,18 +173,18 @@ module LlmCostTracker
           return [] unless input.is_a?(String)
           return [] unless LlmCostTracker::Providers::Openai::ModelFamilies.character_billed_tts?(request[:model])
-          [LlmCostTracker::Billing::LineItem.build(
-            component_key: :text_to_speech_character,
+          [LlmCostTracker::Charges::LineItem.build(
+            dimension_key: "text_to_speech_character",
             quantity: input.length,
-            cost_status: LlmCostTracker::Billing::CostStatus::UNKNOWN,
-            pricing_basis: :provider_usage,
+            cost_status: LlmCostTracker::Charges::CostStatus::UNKNOWN,
+            pricing_basis: "provider_usage",
             provider_field: "request.input"
           )]
         end
         def record_moderation(response, request:, latency_ms:, host: nil)
           record_passthrough(
-            model: object_value(response, :model) || request[:model],
+            model: response.model || request[:model],
             response: response,
             latency_ms: latency_ms,
             host: host,
@@ -227,9 +201,9 @@ module LlmCostTracker
               event: Event.build(
                 provider: provider_for_host(host),
                 model: model,
-                token_usage: TokenUsage.build(**token_attributes),
-                usage_source: :sdk_response,
-                provider_response_id: response && object_value(response, :id),
+                token_usage: Usage::TokenUsage.build(**token_attributes),
+                usage_source: LlmCostTracker::Usage::Source::SDK_RESPONSE,
+                provider_response_id: response&.try(:id),
                 service_line_items: service_line_items
               ),
               latency_ms: latency_ms
@@ -237,242 +211,8 @@ module LlmCostTracker
           end
         end
-        def service_line_items_from(response, request: nil)
-          model = object_value(response, :model) || request&.dig(:model)
-          output = object_value(response, :output)
-          output_items = output.respond_to?(:each) ? output.map { |item| normalize_output_item(item) }.compact : []
-          chat_search = output_items.empty? ? chat_completions_search_item(response, model: model) : nil
-          output_items << chat_search if chat_search
-          return [] if output_items.empty?
-          LlmCostTracker::Parsers::OpenaiServiceCharges.line_items_from_output(
-            output_items, request: request, model: model
-          )
-        end
-        def chat_completions_search_item(response, model: nil)
-          choices = object_value(response, :choices)
-          return nil unless choices.respond_to?(:any?)
-          provider_field = if choices.any? { |choice| choice_used_url_citation?(choice) }
-                             LlmCostTracker::Parsers::OpenaiServiceCharges::CHAT_COMPLETIONS_ANNOTATION_PROVIDER_FIELD
-                           elsif LlmCostTracker::Providers::Openai::ModelFamilies.chat_completions_search?(model)
-                             LlmCostTracker::Parsers::OpenaiServiceCharges::CHAT_COMPLETIONS_SEARCH_MODEL_PROVIDER_FIELD
-                           end
-          return nil unless provider_field
-          { "type" => "web_search_call", "id" => object_value(response, :id),
-            "action" => { "type" => "search" }, "provider_field" => provider_field }
-        end
-        def choice_used_url_citation?(choice)
-          message = object_value(choice, :message)
-          annotations = message && object_value(message, :annotations)
-          return false unless annotations.respond_to?(:any?)
-          annotations.any? { |annotation| object_value(annotation, :type).to_s == "url_citation" }
-        end
-        def normalize_output_item(item)
-          return item if item.is_a?(Hash)
-          return nil if item.nil?
-          {
-            "type" => object_value(item, :type)&.to_s,
-            "id" => object_value(item, :id),
-            "status" => object_value(item, :status),
-            "container_id" => object_value(item, :container_id),
-            "action" => normalize_output_action(object_value(item, :action))
-          }
-        end
-        def normalize_output_action(action)
-          return nil if action.nil?
-          return action if action.is_a?(Hash)
-          { "type" => object_value(action, :type)&.to_s }
-        end
-        def token_usage(usage:, input_tokens:, output_tokens:, cache_read:, model: nil)
-          audio_input = audio_input_tokens(usage)
-          audio_output = audio_output_tokens(usage)
-          image_input = image_input_tokens(usage)
-          image_output_details = image_output_tokens(usage)
-          text_output_details = text_output_tokens(usage)
-          image_output, regular_output = split_responses_image_output(
-            output_tokens: output_tokens.to_i,
-            image_output_details: image_output_details,
-            text_output_details: text_output_details,
-            audio_output: audio_output,
-            default_to_image: LlmCostTracker::Providers::Openai::ModelFamilies.image_output?(model)
-          )
-          TokenUsage.build(
-            input_tokens: regular_input_tokens(input_tokens, cache_read, audio_input, image_input),
-            output_tokens: regular_output,
-            cache_read_input_tokens: cache_read,
-            audio_input_tokens: audio_input,
-            audio_output_tokens: audio_output,
-            image_input_tokens: image_input,
-            image_output_tokens: image_output,
-            hidden_output_tokens: hidden_output_tokens(usage)
-          )
-        end
-        INPUT_DETAIL_KEYS = %i[input_tokens_details input_token_details prompt_tokens_details].freeze
-        OUTPUT_DETAIL_KEYS = %i[output_tokens_details output_token_details completion_tokens_details].freeze
-        def cache_read_input_tokens(usage) = detail(usage, INPUT_DETAIL_KEYS, :cached_tokens)
-        def hidden_output_tokens(usage)    = detail(usage, OUTPUT_DETAIL_KEYS, :reasoning_tokens)
-        def audio_input_tokens(usage)      = detail(usage, INPUT_DETAIL_KEYS, :audio_tokens)
-        def audio_output_tokens(usage)     = detail(usage, OUTPUT_DETAIL_KEYS, :audio_tokens)
-        def image_input_tokens(usage)      = detail(usage, INPUT_DETAIL_KEYS, :image_tokens)
-        def image_output_tokens(usage)     = detail(usage, OUTPUT_DETAIL_KEYS, :image_tokens)
-        def text_output_tokens(usage)      = detail(usage, OUTPUT_DETAIL_KEYS, :text_tokens)
-        def detail(usage, containers, key)
-          containers.each do |container|
-            value = object_dig(usage, container, key)
-            return value.to_i if value
-          end
-          0
-        end
-        def regular_input_tokens(input_tokens, cache_read, audio_input, image_input)
-          [input_tokens.to_i - cache_read - audio_input - image_input, 0].max
-        end
-        def split_responses_image_output(output_tokens:, image_output_details:, text_output_details:, audio_output:,
-                                         default_to_image: false)
-          if image_output_details.zero? && text_output_details.zero?
-            remainder = [output_tokens - audio_output, 0].max
-            return default_to_image ? [remainder, 0] : [0, remainder]
-          end
-          text_output = text_output_details
-          text_output = [output_tokens - image_output_details - audio_output, 0].max if text_output.zero?
-          [image_output_details, text_output]
-        end
-      end
-      module ResponsesPatch
-        def create(*args, **kwargs)
-          request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
-          LlmCostTracker::Integrations::Openai.enforce_budget!(request: request)
-          started_at = LlmCostTracker::Timing.now_monotonic
-          response = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
-          LlmCostTracker::Integrations::Openai.record_response(
-            response,
-            request: request,
-            latency_ms: LlmCostTracker::Timing.elapsed_ms(started_at),
-            host: LlmCostTracker::Integrations::Openai.client_host_for(self)
-          )
-          response
-        end
-        def stream(*args, **kwargs)
-          LlmCostTracker::Integrations::Openai.wrap_stream_call(args, kwargs, self) do |normalized, _|
-            super(*normalized)
-          end
-        end
-        def stream_raw(*args, **kwargs)
-          LlmCostTracker::Integrations::Openai.wrap_stream_call(args, kwargs, self) do |normalized, _|
-            super(*normalized)
-          end
-        end
-        def retrieve_streaming(response_id, *args, **kwargs)
-          LlmCostTracker::Integrations::Openai.wrap_stream_call(args, kwargs, self) do |normalized, collector|
-            collector.provider_response_id = response_id
-            super(response_id, *normalized)
-          end
-        end
-      end
-      module ChatCompletionsPatch
-        def create(*args, **kwargs)
-          request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
-          LlmCostTracker::Integrations::Openai.enforce_budget!(request: request)
-          started_at = LlmCostTracker::Timing.now_monotonic
-          response = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
-          LlmCostTracker::Integrations::Openai.record_response(
-            response,
-            request: request,
-            latency_ms: LlmCostTracker::Timing.elapsed_ms(started_at),
-            host: LlmCostTracker::Integrations::Openai.client_host_for(self)
-          )
-          response
-        end
-        def stream(*args, **kwargs)
-          LlmCostTracker::Integrations::Openai.wrap_stream_call(args, kwargs, self) do |normalized, _|
-            super(*normalized)
-          end
-        end
-        def stream_raw(*args, **kwargs)
-          LlmCostTracker::Integrations::Openai.wrap_stream_call(args, kwargs, self) do |normalized, _|
-            super(*normalized)
-          end
-        end
-      end
-      module PatchBuilder
-        module_function
-        def build(record_method:, methods:)
-          Module.new.tap do |mod|
-            methods.each { |method_name| define_wrapped_method(mod, method_name, record_method) }
-          end
-        end
-        def define_wrapped_method(mod, method_name, record_method)
-          mod.define_method(method_name) do |*args, **kwargs, &block|
-            integration = LlmCostTracker::Integrations::Openai
-            request = integration.request_params(args, kwargs)
-            integration.enforce_budget!(request: request)
-            started_at = LlmCostTracker::Timing.now_monotonic
-            response = super(*integration.normalize_sdk_args(args, kwargs), &block)
-            integration.public_send(
-              record_method, response,
-              request: request,
-              latency_ms: LlmCostTracker::Timing.elapsed_ms(started_at),
-              host: integration.client_host_for(self)
-            )
-            response
-          end
-        end
-      end
-      EmbeddingsPatch = PatchBuilder.build(record_method: :record_response, methods: %i[create])
-      ImagesPatch = PatchBuilder.build(record_method: :record_image, methods: %i[generate edit create_variation])
-      TranscriptionsPatch = PatchBuilder.build(record_method: :record_transcription, methods: %i[create])
-      TranslationsPatch = PatchBuilder.build(record_method: :record_transcription, methods: %i[create])
-      SpeechPatch = PatchBuilder.build(record_method: :record_speech, methods: %i[create])
-      ModerationsPatch = PatchBuilder.build(record_method: :record_moderation, methods: %i[create])
-      module StreamingImagesPatch
-        %i[generate_stream_raw edit_stream_raw].each do |method_name|
-          define_method(method_name) do |*args, **kwargs|
-            request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
-            LlmCostTracker::Integrations::Openai.enforce_budget!(request: request)
-            host = LlmCostTracker::Integrations::Openai.client_host_for(self)
-            collector = LlmCostTracker::Integrations::Openai.stream_collector(request, host: host)
-            stream = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
-            LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
-          end
-        end
-      end
-      module StreamingTranscriptionsPatch
-        def create_streaming(*args, **kwargs)
-          request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
-          LlmCostTracker::Integrations::Openai.enforce_budget!(request: request)
-          host = LlmCostTracker::Integrations::Openai.client_host_for(self)
-          collector = LlmCostTracker::Integrations::Openai.stream_collector(request, host: host)
-          stream = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
-          LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
+        def usage_hash_from(response)
+          response.try(:usage)&.deep_to_h
         end
       end
     end