RubyGems - llm_cost_tracker - Versions diffs - 0.9.0 → 0.10.0 - Mend

llm_cost_tracker 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (104) hide show

data/lib/llm_cost_tracker/integrations/anthropic.rb CHANGED Viewed

@@ -2,6 +2,7 @@
 require_relative "base"
 require_relative "../billing/line_item"
+require_relative "../providers/anthropic/tier_classification"
 module LlmCostTracker
   module Integrations
@@ -45,10 +46,10 @@ module LlmCostTracker
             next if input_tokens.nil? && output_tokens.nil?
             LlmCostTracker::Tracker.record(
-              capture: UsageCapture.build(
+              event: Event.build(
                 provider: "anthropic",
                 model: object_value(message, :model) || request[:model],
-                pricing_mode: pricing_mode(message: message, request: request, usage: usage),
+                pricing_mode: pricing_mode(request: request, usage: usage),
                 token_usage: token_usage(usage: usage, input_tokens: input_tokens, output_tokens: output_tokens),
                 usage_source: :sdk_response,
                 provider_response_id: object_value(message, :id),
@@ -74,7 +75,7 @@ module LlmCostTracker
         end
         def line_item_for_server_tool(server_tool_use, component_key, count_key, provider_field)
-          quantity = server_tool_count(server_tool_use, count_key)
+          quantity = object_value(server_tool_use, count_key).to_i
           return nil if quantity.zero?
           Billing::LineItem.build(
@@ -86,14 +87,6 @@ module LlmCostTracker
           )
         end
-        def server_tool_count(server_tool_use, count_key)
-          direct = object_value(server_tool_use, count_key).to_i
-          return direct if direct.positive?
-          return 0 unless server_tool_use.respond_to?(:to_h)
-          server_tool_use.to_h[count_key].to_i
-        end
         def token_usage(usage:, input_tokens:, output_tokens:)
           cache_creation = object_value(usage, :cache_creation)
           if cache_creation
@@ -118,65 +111,58 @@ module LlmCostTracker
           )
         end
-        DATA_RESIDENCY_GEOS = %w[us].freeze
-        # Anthropic Priority Tier is committed throughput (tokens/min capacity), not a per-token
-        # surcharge. Treat it as standard pricing so cost_status doesn't fall to :unknown.
-        STANDARD_EQUIVALENT_SERVICE_TIERS = %w[standard standard_only priority].freeze
-        def pricing_mode(message:, request:, usage:)
-          service_tier = object_value(usage, :service_tier) ||
-                         object_value(message, :service_tier) ||
-                         request[:service_tier]
-          service_tier = nil if STANDARD_EQUIVALENT_SERVICE_TIERS.include?(service_tier.to_s)
+        def pricing_mode(request:, usage:)
+          service_tier = object_value(usage, :service_tier) || request[:service_tier]
+          tier = Providers::Anthropic::TierClassification
+          service_tier = nil if tier.standard_equivalent_tier?(service_tier)
           modes = [
-            Pricing.normalize_mode(object_value(usage, :speed) || object_value(message, :speed) || request[:speed]),
+            Pricing.normalize_mode(object_value(usage, :speed) || request[:speed]),
             Pricing.normalize_mode(service_tier)
           ]
-          geo = inference_geo(message: message, request: request, usage: usage).to_s.downcase
-          modes << "data_residency" if DATA_RESIDENCY_GEOS.include?(geo)
+          geo = inference_geo(request: request, usage: usage).to_s.downcase
+          modes << "data_residency" if tier.data_residency_geo?(geo)
           modes = modes.compact.uniq
           modes.empty? ? nil : modes.join("_")
         end
         def stream_pricing_mode(request)
-          pricing_mode(message: nil, request: request || {}, usage: nil)
+          pricing_mode(request: request || {}, usage: nil)
         end
-        def inference_geo(message:, request:, usage:)
-          object_value(usage, :inference_geo) ||
-            object_value(message, :inference_geo) ||
-            request[:inference_geo]
+        def inference_geo(request:, usage:)
+          object_value(usage, :inference_geo) || request[:inference_geo]
+        end
+        def wrap_stream_call(args, kwargs)
+          request = request_params(args, kwargs)
+          enforce_budget!(request: request)
+          collector = stream_collector(request)
+          stream = yield
+          track_stream(stream, collector: collector)
         end
       end
       module MessagesPatch
         def create(*args, **kwargs)
-          LlmCostTracker::Integrations::Anthropic.enforce_budget!
+          request = LlmCostTracker::Integrations::Anthropic.request_params(args, kwargs)
+          LlmCostTracker::Integrations::Anthropic.enforce_budget!(request: request)
           started_at = LlmCostTracker::Timing.now_monotonic
           message = super
           LlmCostTracker::Integrations::Anthropic.record_message(
             message,
-            request: LlmCostTracker::Integrations::Anthropic.request_params(args, kwargs),
-            latency_ms: LlmCostTracker::Integrations::Anthropic.elapsed_ms(started_at)
+            request: request,
+            latency_ms: LlmCostTracker::Timing.elapsed_ms(started_at)
           )
           message
         end
         def stream(*args, **kwargs)
-          request = LlmCostTracker::Integrations::Anthropic.request_params(args, kwargs)
-          LlmCostTracker::Integrations::Anthropic.enforce_budget!
-          collector = LlmCostTracker::Integrations::Anthropic.stream_collector(request)
-          stream = super
-          LlmCostTracker::Integrations::Anthropic.track_stream(stream, collector: collector)
+          LlmCostTracker::Integrations::Anthropic.wrap_stream_call(args, kwargs) { super }
         end
         def stream_raw(*args, **kwargs)
-          request = LlmCostTracker::Integrations::Anthropic.request_params(args, kwargs)
-          LlmCostTracker::Integrations::Anthropic.enforce_budget!
-          collector = LlmCostTracker::Integrations::Anthropic.stream_collector(request)
-          stream = super
-          LlmCostTracker::Integrations::Anthropic.track_stream(stream, collector: collector)
+          LlmCostTracker::Integrations::Anthropic.wrap_stream_call(args, kwargs) { super }
         end
       end
     end

data/lib/llm_cost_tracker/integrations/base.rb CHANGED Viewed

@@ -3,6 +3,7 @@
 require "active_support/core_ext/hash/indifferent_access"
 require "active_support/core_ext/string/inflections"
+require_relative "../doctor/check"
 require_relative "../logging"
 require_relative "../timing"
 require_relative "../capture/stream_collector"
@@ -11,7 +12,7 @@ require_relative "../capture/stream_tracker"
 module LlmCostTracker
   module Integrations
     module Base
-      Result = Data.define(:name, :status, :message)
+      Result = LlmCostTracker::Doctor::Check
       def active?
         LlmCostTracker.configuration.instrumented?(integration_name)
@@ -26,26 +27,28 @@ module LlmCostTracker
       end
       def status
-        name = integration_name
+        name = integration_name.to_s
         problems = version_problems + target_problems
         if problems.any?
-          return Result.new(name, :warn, "#{name} integration cannot be installed: #{problems.join('; ')}")
+          return Result.new(:warn, name, "#{name} integration cannot be installed: #{problems.join('; ')}")
         end
         installed = patch_targets.reject { |target| target.fetch(:optional) }.all? do |target|
           target.fetch(:constant_name).to_s.safe_constantize&.ancestors&.include?(target.fetch(:patch))
         end
-        return Result.new(name, :ok, "#{name} integration installed") if installed
+        return Result.new(:ok, name, "#{name} integration installed") if installed
-        Result.new(name, :warn, "#{name} integration is enabled but not installed")
+        Result.new(:warn, name, "#{name} integration is enabled but not installed")
       end
-      def elapsed_ms(started_at)
-        Timing.elapsed_ms(started_at)
-      end
+      def enforce_budget!(request:)
+        return unless active?
-      def enforce_budget!
-        LlmCostTracker::Tracker.enforce_budget! if active?
+        LlmCostTracker::Tracker.enforce_budget!(
+          provider: integration_name.to_s,
+          model: request[:model],
+          request: request
+        )
       end
       def record_safely
@@ -61,7 +64,7 @@ module LlmCostTracker
           case args.first
           when Hash then args.first
           when nil then {}
-          else args.first.respond_to?(:to_h) ? args.first.to_h : {}
+          else args.first.to_h
           end
         params.merge(kwargs).with_indifferent_access
       rescue StandardError

data/lib/llm_cost_tracker/integrations/openai.rb CHANGED Viewed

@@ -3,6 +3,8 @@
 require_relative "base"
 require_relative "../billing/line_item"
 require_relative "../parsers/openai_service_charges"
+require_relative "../providers/azure/hosts"
+require_relative "../providers/openai/model_families"
 module LlmCostTracker
   module Integrations
@@ -24,22 +26,35 @@ module LlmCostTracker
         def stream_collector(request, host: nil)
           LlmCostTracker::Capture::StreamCollector.new(
-            provider: integration_name.to_s,
+            provider: provider_for_host(host),
             model: request[:model],
             pricing_mode: stream_pricing_mode(request, host: host),
             request: request
           )
         end
+        def wrap_stream_call(args, kwargs, resource)
+          request = request_params(args, kwargs)
+          enforce_budget!(request: request)
+          host = client_host_for(resource)
+          collector = stream_collector(request, host: host)
+          stream = yield(normalize_sdk_args(args, kwargs), collector)
+          track_stream(stream, collector: collector)
+        end
         def client_host_for(resource)
           client = resource.instance_variable_get(:@client)
-          return nil unless client.respond_to?(:base_url, true)
+          return nil unless client
-          URI.parse(client.send(:base_url).to_s).host
+          URI.parse(client.base_url.to_s).host
         rescue URI::InvalidURIError
           nil
         end
+        def provider_for_host(host)
+          LlmCostTracker::Providers::Azure::Hosts.openai?(host) ? "azure_openai" : "openai"
+        end
         def minimum_version
           "0.59.0"
         end
@@ -97,8 +112,8 @@ module LlmCostTracker
             cache_read = cache_read_input_tokens(usage)
             model = object_value(response, :model) || request[:model]
             LlmCostTracker::Tracker.record(
-              capture: UsageCapture.build(
-                provider: "openai",
+              event: Event.build(
+                provider: provider_for_host(host),
                 model: model,
                 pricing_mode: LlmCostTracker::Parsers::OpenaiUsage.combined_pricing_mode(
                   host: host,
@@ -115,7 +130,7 @@ module LlmCostTracker
           end
         end
-        def record_image(response, request:, latency_ms:)
+        def record_image(response, request:, latency_ms:, host: nil)
           usage = object_value(response, :usage)
           raw_input = usage ? object_value(usage, :input_tokens).to_i : 0
           raw_output = usage ? object_value(usage, :output_tokens).to_i : 0
@@ -127,6 +142,7 @@ module LlmCostTracker
             model: request[:model],
             response: response,
             latency_ms: latency_ms,
+            host: host,
             input_tokens: text_input,
             image_input_tokens: image_input,
             output_tokens: text_output,
@@ -144,11 +160,12 @@ module LlmCostTracker
           [image_tokens, text_tokens]
         end
-        def record_transcription(response, request:, latency_ms:)
+        def record_transcription(response, request:, latency_ms:, host: nil)
           record_passthrough(
             model: request[:model],
             response: response,
             latency_ms: latency_ms,
+            host: host,
             **transcription_token_attributes(object_value(response, :usage))
           )
         end
@@ -165,24 +182,22 @@ module LlmCostTracker
           }
         end
-        def record_speech(_response, request:, latency_ms:)
+        def record_speech(_response, request:, latency_ms:, host: nil)
           record_passthrough(
             model: request[:model],
             response: nil,
             latency_ms: latency_ms,
+            host: host,
             input_tokens: 0,
             output_tokens: 0,
             service_line_items: speech_line_items(request)
           )
         end
-        CHARACTER_BILLED_TTS_MODELS = /\Atts-1(-hd)?\z/
-        private_constant :CHARACTER_BILLED_TTS_MODELS
         def speech_line_items(request)
           input = request[:input]
           return [] unless input.is_a?(String)
-          return [] unless CHARACTER_BILLED_TTS_MODELS.match?(request[:model].to_s)
+          return [] unless LlmCostTracker::Providers::Openai::ModelFamilies.character_billed_tts?(request[:model])
           [LlmCostTracker::Billing::LineItem.build(
             component_key: :text_to_speech_character,
@@ -193,23 +208,24 @@ module LlmCostTracker
           )]
         end
-        def record_moderation(response, request:, latency_ms:)
+        def record_moderation(response, request:, latency_ms:, host: nil)
           record_passthrough(
             model: object_value(response, :model) || request[:model],
             response: response,
             latency_ms: latency_ms,
+            host: host,
             input_tokens: 0,
             output_tokens: 0
           )
         end
-        def record_passthrough(model:, response:, latency_ms:, service_line_items: [], **token_attributes)
+        def record_passthrough(model:, response:, latency_ms:, host: nil, service_line_items: [], **token_attributes)
           return unless active?
           record_safely do
             LlmCostTracker::Tracker.record(
-              capture: UsageCapture.build(
-                provider: "openai",
+              event: Event.build(
+                provider: provider_for_host(host),
                 model: model,
                 token_usage: TokenUsage.build(**token_attributes),
                 usage_source: :sdk_response,
@@ -222,16 +238,41 @@ module LlmCostTracker
         end
         def service_line_items_from(response, request: nil)
+          model = object_value(response, :model) || request&.dig(:model)
           output = object_value(response, :output)
-          return [] unless output.respond_to?(:each)
+          output_items = output.respond_to?(:each) ? output.map { |item| normalize_output_item(item) }.compact : []
+          chat_search = output_items.empty? ? chat_completions_search_item(response, model: model) : nil
+          output_items << chat_search if chat_search
+          return [] if output_items.empty?
           LlmCostTracker::Parsers::OpenaiServiceCharges.line_items_from_output(
-            output.map { |item| normalize_output_item(item) },
-            request: request,
-            model: object_value(response, :model) || request&.dig(:model)
+            output_items, request: request, model: model
           )
         end
+        def chat_completions_search_item(response, model: nil)
+          choices = object_value(response, :choices)
+          return nil unless choices.respond_to?(:any?)
+          provider_field = if choices.any? { |choice| choice_used_url_citation?(choice) }
+                             LlmCostTracker::Parsers::OpenaiServiceCharges::CHAT_COMPLETIONS_ANNOTATION_PROVIDER_FIELD
+                           elsif LlmCostTracker::Providers::Openai::ModelFamilies.chat_completions_search?(model)
+                             LlmCostTracker::Parsers::OpenaiServiceCharges::CHAT_COMPLETIONS_SEARCH_MODEL_PROVIDER_FIELD
+                           end
+          return nil unless provider_field
+          { "type" => "web_search_call", "id" => object_value(response, :id),
+            "action" => { "type" => "search" }, "provider_field" => provider_field }
+        end
+        def choice_used_url_citation?(choice)
+          message = object_value(choice, :message)
+          annotations = message && object_value(message, :annotations)
+          return false unless annotations.respond_to?(:any?)
+          annotations.any? { |annotation| object_value(annotation, :type).to_s == "url_citation" }
+        end
         def normalize_output_item(item)
           return item if item.is_a?(Hash)
           return nil if item.nil?
@@ -252,9 +293,6 @@ module LlmCostTracker
           { "type" => object_value(action, :type)&.to_s }
         end
-        IMAGE_OUTPUT_MODEL_PATTERN = /\Agpt-image-/i
-        private_constant :IMAGE_OUTPUT_MODEL_PATTERN
         def token_usage(usage:, input_tokens:, output_tokens:, cache_read:, model: nil)
           audio_input = audio_input_tokens(usage)
           audio_output = audio_output_tokens(usage)
@@ -266,7 +304,7 @@ module LlmCostTracker
             image_output_details: image_output_details,
             text_output_details: text_output_details,
             audio_output: audio_output,
-            default_to_image: model.to_s.match?(IMAGE_OUTPUT_MODEL_PATTERN)
+            default_to_image: LlmCostTracker::Providers::Openai::ModelFamilies.image_output?(model)
           )
           TokenUsage.build(
@@ -319,77 +357,64 @@ module LlmCostTracker
       module ResponsesPatch
         def create(*args, **kwargs)
-          LlmCostTracker::Integrations::Openai.enforce_budget!
+          request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
+          LlmCostTracker::Integrations::Openai.enforce_budget!(request: request)
           started_at = LlmCostTracker::Timing.now_monotonic
           response = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
           LlmCostTracker::Integrations::Openai.record_response(
             response,
-            request: LlmCostTracker::Integrations::Openai.request_params(args, kwargs),
-            latency_ms: LlmCostTracker::Integrations::Openai.elapsed_ms(started_at),
+            request: request,
+            latency_ms: LlmCostTracker::Timing.elapsed_ms(started_at),
             host: LlmCostTracker::Integrations::Openai.client_host_for(self)
           )
           response
         end
         def stream(*args, **kwargs)
-          request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
-          LlmCostTracker::Integrations::Openai.enforce_budget!
-          host = LlmCostTracker::Integrations::Openai.client_host_for(self)
-          collector = LlmCostTracker::Integrations::Openai.stream_collector(request, host: host)
-          stream = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
-          LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
+          LlmCostTracker::Integrations::Openai.wrap_stream_call(args, kwargs, self) do |normalized, _|
+            super(*normalized)
+          end
         end
         def stream_raw(*args, **kwargs)
-          request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
-          LlmCostTracker::Integrations::Openai.enforce_budget!
-          host = LlmCostTracker::Integrations::Openai.client_host_for(self)
-          collector = LlmCostTracker::Integrations::Openai.stream_collector(request, host: host)
-          stream = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
-          LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
+          LlmCostTracker::Integrations::Openai.wrap_stream_call(args, kwargs, self) do |normalized, _|
+            super(*normalized)
+          end
         end
         def retrieve_streaming(response_id, *args, **kwargs)
-          request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
-          LlmCostTracker::Integrations::Openai.enforce_budget!
-          host = LlmCostTracker::Integrations::Openai.client_host_for(self)
-          collector = LlmCostTracker::Integrations::Openai.stream_collector(request, host: host)
-          collector.provider_response_id = response_id
-          stream = super(response_id, *LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
-          LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
+          LlmCostTracker::Integrations::Openai.wrap_stream_call(args, kwargs, self) do |normalized, collector|
+            collector.provider_response_id = response_id
+            super(response_id, *normalized)
+          end
         end
       end
       module ChatCompletionsPatch
         def create(*args, **kwargs)
-          LlmCostTracker::Integrations::Openai.enforce_budget!
+          request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
+          LlmCostTracker::Integrations::Openai.enforce_budget!(request: request)
           started_at = LlmCostTracker::Timing.now_monotonic
           response = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
           LlmCostTracker::Integrations::Openai.record_response(
             response,
-            request: LlmCostTracker::Integrations::Openai.request_params(args, kwargs),
-            latency_ms: LlmCostTracker::Integrations::Openai.elapsed_ms(started_at),
+            request: request,
+            latency_ms: LlmCostTracker::Timing.elapsed_ms(started_at),
             host: LlmCostTracker::Integrations::Openai.client_host_for(self)
           )
           response
         end
         def stream(*args, **kwargs)
-          request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
-          LlmCostTracker::Integrations::Openai.enforce_budget!
-          host = LlmCostTracker::Integrations::Openai.client_host_for(self)
-          collector = LlmCostTracker::Integrations::Openai.stream_collector(request, host: host)
-          stream = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
-          LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
+          LlmCostTracker::Integrations::Openai.wrap_stream_call(args, kwargs, self) do |normalized, _|
+            super(*normalized)
+          end
         end
         def stream_raw(*args, **kwargs)
-          request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
-          LlmCostTracker::Integrations::Openai.enforce_budget!
-          host = LlmCostTracker::Integrations::Openai.client_host_for(self)
-          collector = LlmCostTracker::Integrations::Openai.stream_collector(request, host: host)
-          stream = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
-          LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
+          LlmCostTracker::Integrations::Openai.wrap_stream_call(args, kwargs, self) do |normalized, _|
+            super(*normalized)
+          end
         end
       end
@@ -405,13 +430,15 @@ module LlmCostTracker
         def define_wrapped_method(mod, method_name, record_method)
           mod.define_method(method_name) do |*args, **kwargs, &block|
             integration = LlmCostTracker::Integrations::Openai
-            integration.enforce_budget!
+            request = integration.request_params(args, kwargs)
+            integration.enforce_budget!(request: request)
             started_at = LlmCostTracker::Timing.now_monotonic
             response = super(*integration.normalize_sdk_args(args, kwargs), &block)
             integration.public_send(
               record_method, response,
-              request: integration.request_params(args, kwargs),
-              latency_ms: integration.elapsed_ms(started_at)
+              request: request,
+              latency_ms: LlmCostTracker::Timing.elapsed_ms(started_at),
+              host: integration.client_host_for(self)
             )
             response
           end
@@ -429,7 +456,7 @@ module LlmCostTracker
         %i[generate_stream_raw edit_stream_raw].each do |method_name|
           define_method(method_name) do |*args, **kwargs|
             request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
-            LlmCostTracker::Integrations::Openai.enforce_budget!
+            LlmCostTracker::Integrations::Openai.enforce_budget!(request: request)
             host = LlmCostTracker::Integrations::Openai.client_host_for(self)
             collector = LlmCostTracker::Integrations::Openai.stream_collector(request, host: host)
             stream = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
@@ -441,7 +468,7 @@ module LlmCostTracker
       module StreamingTranscriptionsPatch
         def create_streaming(*args, **kwargs)
           request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
-          LlmCostTracker::Integrations::Openai.enforce_budget!
+          LlmCostTracker::Integrations::Openai.enforce_budget!(request: request)
           host = LlmCostTracker::Integrations::Openai.client_host_for(self)
           collector = LlmCostTracker::Integrations::Openai.stream_collector(request, host: host)
           stream = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))