RubyGems - llm_cost_tracker - Versions diffs - 0.7.2 → 0.8.0 - Mend

llm_cost_tracker 0.7.2 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (152) hide show

data/lib/llm_cost_tracker/parsers/openai_usage.rb CHANGED Viewed

@@ -1,8 +1,14 @@
 # frozen_string_literal: true
+require_relative "openai_service_charges"
 module LlmCostTracker
   module Parsers
     module OpenaiUsage
+      include OpenaiServiceCharges
+      OPENAI_DATA_RESIDENCY_HOST_PATTERN = /\A[a-z]{2,3}\.api\.openai\.com\z/
       private
       def parse_openai_usage(request_url:, request_body:, response_status:, response_body:)
@@ -27,7 +33,8 @@ module LlmCostTracker
           ),
           model: model,
           token_usage: token_usage(usage: usage, cache_read: cache_read),
-          usage_source: :response
+          usage_source: :response,
+          service_line_items: openai_service_line_items(response)
         )
       end
@@ -35,8 +42,7 @@ module LlmCostTracker
         return nil unless response_status == 200
         request = safe_json_parse(request_body)
-        model =
-          find_event_value(events) { |data| data["model"] || data.dig("response", "model") } || request["model"]
+        model = find_event_value(events) { |data| data["model"] || data.dig("response", "model") } || request["model"]
         usage = detect_stream_usage(events)
         response_id = find_event_value(events) { |data| data["id"] || data.dig("response", "id") }
         pricing_mode = pricing_mode(
@@ -44,6 +50,7 @@ module LlmCostTracker
           model: model,
           service_tier: stream_pricing_mode(events) || request["service_tier"]
         )
+        service_line_items = openai_stream_service_line_items(events)
         if usage
           cache_read = cache_read_input_tokens(usage)
@@ -54,14 +61,16 @@ module LlmCostTracker
             model: model,
             token_usage: token_usage(usage: usage, cache_read: cache_read),
             stream: true,
-            usage_source: :stream_final
+            usage_source: :stream_final,
+            service_line_items: service_line_items
           )
         else
           build_unknown_stream_usage(
             provider: provider_for(request_url),
             model: model,
             provider_response_id: response_id,
-            pricing_mode: pricing_mode
+            pricing_mode: pricing_mode,
+            service_line_items: service_line_items
           )
         end
       end
@@ -88,7 +97,7 @@ module LlmCostTracker
       def openai_regional_processing?(request_url:, model:)
         uri = parsed_uri(request_url)
-        return false unless %w[us.api.openai.com eu.api.openai.com].include?(uri&.host.to_s.downcase)
+        return false unless uri&.host.to_s.downcase.match?(OPENAI_DATA_RESIDENCY_HOST_PATTERN)
         openai_data_residency_model?(model)
       end
@@ -98,36 +107,54 @@ module LlmCostTracker
       end
       def token_usage(usage:, cache_read:)
+        audio_input = audio_input_tokens(usage)
+        audio_output = audio_output_tokens(usage)
         TokenUsage.build(
-          input_tokens: regular_input_tokens(usage: usage, cache_read: cache_read),
-          output_tokens: (usage["completion_tokens"] || usage["output_tokens"]).to_i,
-          total_tokens: total_tokens(usage: usage, cache_read: cache_read),
+          input_tokens: regular_input_tokens(usage: usage, cache_read: cache_read, audio_input: audio_input),
+          output_tokens: regular_output_tokens(usage: usage, audio_output: audio_output),
+          total_tokens: usage["total_tokens"],
           cache_read_input_tokens: cache_read,
+          audio_input_tokens: audio_input,
+          audio_output_tokens: audio_output,
           hidden_output_tokens: hidden_output_tokens(usage)
         )
       end
-      def regular_input_tokens(usage:, cache_read:)
-        [(usage["prompt_tokens"] || usage["input_tokens"]).to_i - cache_read.to_i, 0].max
+      def regular_input_tokens(usage:, cache_read:, audio_input:)
+        [(usage["prompt_tokens"] || usage["input_tokens"]).to_i - cache_read - audio_input, 0].max
+      end
+      def regular_output_tokens(usage:, audio_output:)
+        [(usage["completion_tokens"] || usage["output_tokens"]).to_i - audio_output, 0].max
       end
       def cache_read_input_tokens(usage)
-        details = usage["prompt_tokens_details"] || usage["input_tokens_details"] || {}
-        details["cached_tokens"]
+        details = input_token_details(usage)
+        details["cached_tokens"].to_i
+      end
+      def audio_input_tokens(usage)
+        details = input_token_details(usage)
+        details["audio_tokens"].to_i
       end
       def hidden_output_tokens(usage)
-        details = usage["completion_tokens_details"] || usage["output_tokens_details"] || {}
-        details["reasoning_tokens"]
+        details = output_token_details(usage)
+        details["reasoning_tokens"].to_i
       end
-      def total_tokens(usage:, cache_read:)
-        total = usage["total_tokens"]
-        return total.to_i unless total.nil?
+      def audio_output_tokens(usage)
+        details = output_token_details(usage)
+        details["audio_tokens"].to_i
+      end
+      def input_token_details(usage)
+        usage["prompt_tokens_details"] || usage["input_tokens_details"] || usage["input_token_details"] || {}
+      end
-        regular_input_tokens(usage: usage, cache_read: cache_read) +
-          cache_read.to_i +
-          (usage["completion_tokens"] || usage["output_tokens"]).to_i
+      def output_token_details(usage)
+        usage["completion_tokens_details"] || usage["output_tokens_details"] || usage["output_token_details"] || {}
       end
     end
   end

data/lib/llm_cost_tracker/parsers/sse.rb CHANGED Viewed

@@ -12,7 +12,7 @@ module LlmCostTracker
         def parse(body)
           return [] if body.blank?
-          return parse_json_array(body) if body.lstrip.start_with?("[")
+          return parse_json_array(body) if body.match?(/\A\s*\[/)
           parse_event_stream(body)
         end

data/lib/llm_cost_tracker/parsers.rb CHANGED Viewed

@@ -13,7 +13,7 @@ module LlmCostTracker
     def find_for_provider(provider)
       provider_name = provider.to_s.downcase
       BUILT_INS.find do |parser|
-        Array(parser.provider_names).map { |name| name.to_s.downcase }.include?(provider_name)
+        parser.provider_names.include?(provider_name)
       end
     end
   end

data/lib/llm_cost_tracker/prices.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "metadata": {
-    "updated_at": "2026-05-01",
+    "updated_at": "2026-05-02",
     "currency": "USD",
     "unit": "1M tokens",
     "source_urls": [
@@ -15,13 +15,23 @@
     "schema_version": 1,
     "min_gem_version": "0.4.0"
   },
+  "service_charges": {
+    "anthropic": {
+      "web_search_request": 10.0,
+      "code_execution_hour": 0.05
+    },
+    "openai": {
+      "web_search_request": 10.0,
+      "file_search_call": 2.5
+    }
+  },
   "models": {
     "anthropic/claude-haiku-4-5": {
       "input": 1.0,
       "output": 5.0,
       "cache_read_input": 0.1,
       "cache_write_input": 1.25,
-      "cache_write_1h_input": 2.0,
+      "cache_write_extended_input": 2.0,
       "batch_input": 0.5,
       "batch_output": 2.5
     },
@@ -30,7 +40,7 @@
       "output": 75.0,
       "cache_read_input": 1.5,
       "cache_write_input": 18.75,
-      "cache_write_1h_input": 30.0,
+      "cache_write_extended_input": 30.0,
       "batch_input": 7.5,
       "batch_output": 37.5
     },
@@ -39,7 +49,7 @@
       "output": 75.0,
       "cache_read_input": 1.5,
       "cache_write_input": 18.75,
-      "cache_write_1h_input": 30.0,
+      "cache_write_extended_input": 30.0,
       "batch_input": 7.5,
       "batch_output": 37.5
     },
@@ -48,7 +58,7 @@
       "output": 25.0,
       "cache_read_input": 0.5,
       "cache_write_input": 6.25,
-      "cache_write_1h_input": 10.0,
+      "cache_write_extended_input": 10.0,
       "batch_input": 2.5,
       "batch_output": 12.5
     },
@@ -57,24 +67,24 @@
       "output": 25.0,
       "cache_read_input": 0.5,
       "cache_write_input": 6.25,
-      "cache_write_1h_input": 10.0,
+      "cache_write_extended_input": 10.0,
       "batch_input": 2.5,
       "batch_output": 12.5,
       "data_residency_input": 5.5,
       "data_residency_cache_write_input": 6.875,
-      "data_residency_cache_write_1h_input": 11.0,
+      "data_residency_cache_write_extended_input": 11.0,
       "data_residency_cache_read_input": 0.55,
       "data_residency_output": 27.5,
       "data_residency_batch_input": 2.75,
       "data_residency_batch_output": 13.75,
       "fast_input": 30.0,
       "fast_cache_write_input": 37.5,
-      "fast_cache_write_1h_input": 60.0,
+      "fast_cache_write_extended_input": 60.0,
       "fast_cache_read_input": 3.0,
       "fast_output": 150.0,
       "fast_data_residency_input": 33.0,
       "fast_data_residency_cache_write_input": 41.25,
-      "fast_data_residency_cache_write_1h_input": 66.0,
+      "fast_data_residency_cache_write_extended_input": 66.0,
       "fast_data_residency_cache_read_input": 3.3,
       "fast_data_residency_output": 165.0
     },
@@ -83,12 +93,12 @@
       "output": 25.0,
       "cache_read_input": 0.5,
       "cache_write_input": 6.25,
-      "cache_write_1h_input": 10.0,
+      "cache_write_extended_input": 10.0,
       "batch_input": 2.5,
       "batch_output": 12.5,
       "data_residency_input": 5.5,
       "data_residency_cache_write_input": 6.875,
-      "data_residency_cache_write_1h_input": 11.0,
+      "data_residency_cache_write_extended_input": 11.0,
       "data_residency_cache_read_input": 0.55,
       "data_residency_output": 27.5,
       "data_residency_batch_input": 2.75,
@@ -99,7 +109,7 @@
       "output": 15.0,
       "cache_read_input": 0.3,
       "cache_write_input": 3.75,
-      "cache_write_1h_input": 6.0,
+      "cache_write_extended_input": 6.0,
       "batch_input": 1.5,
       "batch_output": 7.5
     },
@@ -108,7 +118,7 @@
       "output": 15.0,
       "cache_read_input": 0.3,
       "cache_write_input": 3.75,
-      "cache_write_1h_input": 6.0,
+      "cache_write_extended_input": 6.0,
       "batch_input": 1.5,
       "batch_output": 7.5
     },
@@ -117,12 +127,12 @@
       "output": 15.0,
       "cache_read_input": 0.3,
       "cache_write_input": 3.75,
-      "cache_write_1h_input": 6.0,
+      "cache_write_extended_input": 6.0,
       "batch_input": 1.5,
       "batch_output": 7.5,
       "data_residency_input": 3.3,
       "data_residency_cache_write_input": 4.125,
-      "data_residency_cache_write_1h_input": 6.6,
+      "data_residency_cache_write_extended_input": 6.6,
       "data_residency_cache_read_input": 0.33,
       "data_residency_output": 16.5,
       "data_residency_batch_input": 1.65,
@@ -134,7 +144,9 @@
       "output": 0.4,
       "batch_input": 0.05,
       "batch_output": 0.2,
-      "batch_cache_read_input": 0.025
+      "batch_cache_read_input": 0.025,
+      "audio_input": 0.7,
+      "batch_audio_input": 0.35
     },
     "gemini/gemini-2.0-flash-lite": {
       "input": 0.075,
@@ -154,7 +166,11 @@
       "flex_cache_read_input": 0.03,
       "priority_input": 0.54,
       "priority_output": 4.5,
-      "priority_cache_read_input": 0.054
+      "priority_cache_read_input": 0.054,
+      "audio_input": 1.0,
+      "batch_audio_input": 0.5,
+      "flex_audio_input": 0.5,
+      "priority_audio_input": 1.8
     },
     "gemini/gemini-2.5-flash-lite": {
       "input": 0.1,
@@ -168,7 +184,11 @@
       "flex_cache_read_input": 0.01,
       "priority_input": 0.18,
       "priority_output": 0.72,
-      "priority_cache_read_input": 0.018
+      "priority_cache_read_input": 0.018,
+      "audio_input": 0.3,
+      "batch_audio_input": 0.15,
+      "flex_audio_input": 0.15,
+      "priority_audio_input": 0.54
     },
     "gemini/gemini-2.5-pro": {
       "input": 1.25,
@@ -309,6 +329,71 @@
       "priority_output": 1.0,
       "priority_cache_read_input": 0.125
     },
+    "openai/gpt-4o-realtime-preview": {
+      "input": 5.0,
+      "cache_read_input": 2.5,
+      "audio_input": 40.0,
+      "output": 20.0,
+      "audio_output": 80.0
+    },
+    "openai/gpt-4o-mini-realtime-preview": {
+      "input": 0.6,
+      "cache_read_input": 0.3,
+      "audio_input": 10.0,
+      "output": 2.4,
+      "audio_output": 20.0
+    },
+    "openai/gpt-realtime": {
+      "input": 4.0,
+      "cache_read_input": 0.4,
+      "audio_input": 32.0,
+      "output": 16.0,
+      "audio_output": 64.0
+    },
+    "openai/gpt-realtime-1.5": {
+      "input": 4.0,
+      "cache_read_input": 0.4,
+      "audio_input": 32.0,
+      "output": 16.0,
+      "audio_output": 64.0
+    },
+    "openai/gpt-realtime-mini": {
+      "input": 0.6,
+      "cache_read_input": 0.06,
+      "audio_input": 10.0,
+      "output": 2.4,
+      "audio_output": 20.0
+    },
+    "openai/gpt-audio-1.5": {
+      "input": 2.5,
+      "audio_input": 32.0,
+      "output": 10.0,
+      "audio_output": 64.0
+    },
+    "openai/gpt-audio-mini": {
+      "input": 0.6,
+      "audio_input": 10.0,
+      "output": 2.4,
+      "audio_output": 20.0
+    },
+    "openai/gpt-audio": {
+      "input": 2.5,
+      "audio_input": 32.0,
+      "output": 10.0,
+      "audio_output": 64.0
+    },
+    "openai/gpt-4o-audio-preview": {
+      "input": 2.5,
+      "audio_input": 40.0,
+      "output": 10.0,
+      "audio_output": 80.0
+    },
+    "openai/gpt-4o-mini-audio-preview": {
+      "input": 0.15,
+      "audio_input": 10.0,
+      "output": 0.6,
+      "audio_output": 20.0
+    },
     "openai/gpt-5": {
       "input": 1.25,
       "output": 10.0,
@@ -672,7 +757,7 @@
     "anthropic/claude-haiku-3-5": {
       "input": 0.8,
       "cache_write_input": 1.0,
-      "cache_write_1h_input": 1.6,
+      "cache_write_extended_input": 1.6,
       "cache_read_input": 0.08,
       "output": 4.0,
       "batch_input": 0.4,
@@ -681,7 +766,7 @@
     "anthropic/claude-haiku-3": {
       "input": 0.25,
       "cache_write_input": 0.3,
-      "cache_write_1h_input": 0.5,
+      "cache_write_extended_input": 0.5,
       "cache_read_input": 0.03,
       "output": 1.25,
       "batch_input": 0.125,

data/lib/llm_cost_tracker/pricing/effective_prices.rb CHANGED Viewed

@@ -1,18 +1,17 @@
 # frozen_string_literal: true
-require_relative "components"
+require_relative "../billing/components"
 module LlmCostTracker
   module Pricing
     module EffectivePrices
       class << self
         def call(usage:, prices:, pricing_mode:)
-          quantities = usage.price_quantities
           context_tier = context_tier?(usage: usage, prices: prices)
-          Pricing::COMPONENTS.to_h do |component|
-            price_key = component.price_key
-            tokens = quantities.fetch(price_key)
+          Billing::Components::TOKEN_PRICED.to_h do |component|
+            price_key = component.key
+            tokens = usage.public_send(component.token_key)
             price = if tokens.positive?
                       price_for(
                         prices: prices,
@@ -30,29 +29,67 @@ module LlmCostTracker
         private
         def price_for(prices:, key:, pricing_mode:, context_tier:)
-          mode = Pricing.normalize_mode(pricing_mode)
-          return contextual_price(prices: prices, key: key, context_tier: context_tier) unless mode
+          return contextual_price(prices: prices, key: key, context_tier: context_tier) unless pricing_mode
-          contextual_price(prices: prices, key: :"#{mode}_#{key}", context_tier: context_tier) ||
-            derived_mode_price(prices: prices, key: key, mode: mode, context_tier: context_tier)
+          orderings = mode_orderings_for(pricing_mode)
+          orderings.each do |mode|
+            direct = contextual_price(prices: prices, key: :"#{mode}_#{key}", context_tier: context_tier)
+            return direct if direct
+          end
+          return nil if %i[input output].include?(key)
+          derived_mode_price(prices: prices, key: key, modes: orderings, context_tier: context_tier)
         end
+        def mode_orderings_for(pricing_mode)
+          mode_string = pricing_mode.to_s
+          return [mode_string] unless mode_string.include?("_")
+          tokens = tokenize_mode(mode_string)
+          return [mode_string] if tokens.size <= 1
+          [mode_string, *tokens.permutation.map { |permutation| permutation.join("_") }].uniq
+        end
+        def tokenize_mode(mode_string)
+          remaining = mode_string.dup
+          tokens = []
+          loop do
+            break if remaining.empty?
+            compound = COMPOUND_MODE_TOKENS.find { |token| remaining == token || remaining.start_with?("#{token}_") }
+            if compound
+              tokens << compound
+              remaining = remaining.delete_prefix(compound).delete_prefix("_")
+            else
+              first, _, rest = remaining.partition("_")
+              tokens << first
+              remaining = rest
+            end
+          end
+          tokens
+        end
+        COMPOUND_MODE_TOKENS = %w[data_residency].freeze
+        private_constant :COMPOUND_MODE_TOKENS
         def contextual_price(prices:, key:, context_tier:)
           return prices[key] unless context_tier
           prices[:"above_context_#{key}"]
         end
-        def derived_mode_price(prices:, key:, mode:, context_tier:)
+        def derived_mode_price(prices:, key:, modes:, context_tier:)
           standard_price = contextual_price(prices: prices, key: key, context_tier: context_tier)
-          return nil unless standard_price
-          base_key = key == :output ? :output : :input
-          base_price = contextual_price(prices: prices, key: base_key, context_tier: context_tier)
-          mode_base_price = contextual_price(prices: prices, key: :"#{mode}_#{base_key}", context_tier: context_tier)
-          return nil unless base_price && mode_base_price
+          base_price = contextual_price(prices: prices, key: :input, context_tier: context_tier)
+          return nil unless standard_price && base_price
+          return nil if base_price.zero?
-          standard_price * (mode_base_price.to_f / base_price)
+          modes.each do |mode|
+            mode_base_price = contextual_price(prices: prices, key: :"#{mode}_input", context_tier: context_tier)
+            return standard_price * (mode_base_price / base_price) if mode_base_price
+          end
+          nil
         end
         def context_tier?(usage:, prices:)
@@ -62,8 +99,9 @@ module LlmCostTracker
           input_tokens = usage.input_tokens +
                          usage.cache_read_input_tokens +
                          usage.cache_write_input_tokens +
-                         usage.cache_write_1h_input_tokens
-          input_tokens > threshold.to_i
+                         usage.cache_write_extended_input_tokens +
+                         usage.audio_input_tokens
+          input_tokens > threshold
         end
       end
     end

data/lib/llm_cost_tracker/pricing/explainer.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require_relative "../token_usage"
 require_relative "effective_prices"
 module LlmCostTracker
@@ -33,7 +34,7 @@ module LlmCostTracker
     module Explainer
       class << self
-        def call(provider:, model:, token_usage:, pricing_mode: nil)
+        def call(provider:, model:, tokens:, pricing_mode: nil)
           match = Lookup.call(provider: provider, model: model)
           explanation(
@@ -41,7 +42,7 @@ module LlmCostTracker
             model: model,
             pricing_mode: pricing_mode,
             match: match,
-            usage: token_usage
+            usage: TokenUsage.build_from_tokens(tokens)
           )
         end
@@ -50,9 +51,7 @@ module LlmCostTracker
         def explanation(provider:, model:, pricing_mode:, match:, usage:)
           prices = match&.prices
           pricing_mode = Pricing.normalize_mode(pricing_mode)
-          effective = if prices && usage
-                        EffectivePrices.call(usage: usage, prices: prices, pricing_mode: pricing_mode)
-                      end
+          effective = EffectivePrices.call(usage: usage, prices: prices, pricing_mode: pricing_mode) if prices
           Explanation.new(
             provider: provider.to_s,