llm_cost_tracker 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +37 -0
  3. data/README.md +202 -11
  4. data/lib/llm_cost_tracker/budget.rb +97 -0
  5. data/lib/llm_cost_tracker/configuration.rb +37 -0
  6. data/lib/llm_cost_tracker/errors.rb +37 -0
  7. data/lib/llm_cost_tracker/event_metadata.rb +54 -0
  8. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_latency_ms_generator.rb +29 -0
  9. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_latency_ms_to_llm_api_calls.rb.erb +9 -0
  10. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_api_calls.rb.erb +16 -4
  11. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +14 -1
  12. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_cost_precision.rb.erb +15 -0
  13. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_tags_to_jsonb.rb.erb +41 -0
  14. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_cost_precision_generator.rb +29 -0
  15. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_tags_to_jsonb_generator.rb +29 -0
  16. data/lib/llm_cost_tracker/llm_api_call.rb +68 -2
  17. data/lib/llm_cost_tracker/middleware/faraday.rb +50 -12
  18. data/lib/llm_cost_tracker/parsers/anthropic.rb +4 -1
  19. data/lib/llm_cost_tracker/parsers/gemini.rb +9 -2
  20. data/lib/llm_cost_tracker/parsers/openai.rb +10 -3
  21. data/lib/llm_cost_tracker/parsers/openai_compatible.rb +44 -0
  22. data/lib/llm_cost_tracker/parsers/registry.rb +16 -7
  23. data/lib/llm_cost_tracker/price_registry.rb +69 -0
  24. data/lib/llm_cost_tracker/prices.json +51 -0
  25. data/lib/llm_cost_tracker/pricing.rb +74 -74
  26. data/lib/llm_cost_tracker/railtie.rb +3 -0
  27. data/lib/llm_cost_tracker/storage/active_record_store.rb +12 -3
  28. data/lib/llm_cost_tracker/tracker.rb +49 -54
  29. data/lib/llm_cost_tracker/unknown_pricing.rb +47 -0
  30. data/lib/llm_cost_tracker/version.rb +1 -1
  31. data/lib/llm_cost_tracker.rb +33 -5
  32. data/llm_cost_tracker.gemspec +4 -3
  33. metadata +20 -6
@@ -0,0 +1,51 @@
1
+ {
2
+ "metadata": {
3
+ "updated_at": "2026-04-18",
4
+ "currency": "USD",
5
+ "unit": "1M tokens",
6
+ "source_urls": [
7
+ "https://openai.com/api/pricing",
8
+ "https://www.anthropic.com/pricing",
9
+ "https://ai.google.dev/gemini-api/docs/pricing"
10
+ ]
11
+ },
12
+ "models": {
13
+ "gpt-5.2": { "input": 1.75, "cached_input": 0.175, "output": 14.0 },
14
+ "gpt-5.1": { "input": 1.25, "cached_input": 0.125, "output": 10.0 },
15
+ "gpt-5": { "input": 1.25, "cached_input": 0.125, "output": 10.0 },
16
+ "gpt-5-mini": { "input": 0.25, "cached_input": 0.025, "output": 2.0 },
17
+ "gpt-5-nano": { "input": 0.05, "cached_input": 0.005, "output": 0.4 },
18
+ "gpt-4.1": { "input": 2.0, "cached_input": 0.5, "output": 8.0 },
19
+ "gpt-4.1-mini": { "input": 0.4, "cached_input": 0.1, "output": 1.6 },
20
+ "gpt-4.1-nano": { "input": 0.1, "cached_input": 0.025, "output": 0.4 },
21
+ "gpt-4o-2024-05-13": { "input": 5.0, "output": 15.0 },
22
+ "gpt-4o": { "input": 2.5, "cached_input": 1.25, "output": 10.0 },
23
+ "gpt-4o-mini": { "input": 0.15, "cached_input": 0.075, "output": 0.6 },
24
+ "gpt-4-turbo": { "input": 10.0, "output": 30.0 },
25
+ "gpt-4": { "input": 30.0, "output": 60.0 },
26
+ "gpt-3.5-turbo": { "input": 0.5, "output": 1.5 },
27
+ "o1": { "input": 15.0, "cached_input": 7.5, "output": 60.0 },
28
+ "o1-mini": { "input": 1.1, "cached_input": 0.55, "output": 4.4 },
29
+ "o3": { "input": 2.0, "cached_input": 0.5, "output": 8.0 },
30
+ "o3-mini": { "input": 1.1, "cached_input": 0.55, "output": 4.4 },
31
+ "o4-mini": { "input": 1.1, "cached_input": 0.275, "output": 4.4 },
32
+ "claude-sonnet-4-6": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_creation_input": 3.75 },
33
+ "claude-opus-4-6": { "input": 5.0, "output": 25.0, "cache_read_input": 0.5, "cache_creation_input": 6.25 },
34
+ "claude-opus-4-1": { "input": 15.0, "output": 75.0, "cache_read_input": 1.5, "cache_creation_input": 18.75 },
35
+ "claude-opus-4": { "input": 15.0, "output": 75.0, "cache_read_input": 1.5, "cache_creation_input": 18.75 },
36
+ "claude-sonnet-4-5": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_creation_input": 3.75 },
37
+ "claude-sonnet-4": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_creation_input": 3.75 },
38
+ "claude-haiku-4-5": { "input": 1.0, "output": 5.0, "cache_read_input": 0.1, "cache_creation_input": 1.25 },
39
+ "claude-3-7-sonnet": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_creation_input": 3.75 },
40
+ "claude-3-5-sonnet": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_creation_input": 3.75 },
41
+ "claude-3-5-haiku": { "input": 0.8, "output": 4.0, "cache_read_input": 0.08, "cache_creation_input": 1.0 },
42
+ "claude-3-opus": { "input": 15.0, "output": 75.0, "cache_read_input": 1.5, "cache_creation_input": 18.75 },
43
+ "gemini-2.5-pro": { "input": 1.25, "cached_input": 0.125, "output": 10.0 },
44
+ "gemini-2.5-flash": { "input": 0.3, "cached_input": 0.03, "output": 2.5 },
45
+ "gemini-2.5-flash-lite": { "input": 0.1, "cached_input": 0.01, "output": 0.4 },
46
+ "gemini-2.0-flash": { "input": 0.1, "cached_input": 0.025, "output": 0.4 },
47
+ "gemini-2.0-flash-lite": { "input": 0.075, "output": 0.3 },
48
+ "gemini-1.5-pro": { "input": 1.25, "output": 5.0 },
49
+ "gemini-1.5-flash": { "input": 0.075, "output": 0.3 }
50
+ }
51
+ }
@@ -2,52 +2,8 @@
2
2
 
3
3
  module LlmCostTracker
4
4
  # Prices per 1M tokens in USD.
5
- # Updated: April 2026. Override via configuration.
6
5
  module Pricing
7
- PRICES = {
8
- # OpenAI
9
- "gpt-5.2" => { input: 1.75, cached_input: 0.175, output: 14.00 },
10
- "gpt-5.1" => { input: 1.25, cached_input: 0.125, output: 10.00 },
11
- "gpt-5" => { input: 1.25, cached_input: 0.125, output: 10.00 },
12
- "gpt-5-mini" => { input: 0.25, cached_input: 0.025, output: 2.00 },
13
- "gpt-5-nano" => { input: 0.05, cached_input: 0.005, output: 0.40 },
14
- "gpt-4.1" => { input: 2.00, cached_input: 0.50, output: 8.00 },
15
- "gpt-4.1-mini" => { input: 0.40, cached_input: 0.10, output: 1.60 },
16
- "gpt-4.1-nano" => { input: 0.10, cached_input: 0.025, output: 0.40 },
17
- "gpt-4o-2024-05-13" => { input: 5.00, output: 15.00 },
18
- "gpt-4o" => { input: 2.50, cached_input: 1.25, output: 10.00 },
19
- "gpt-4o-mini" => { input: 0.15, cached_input: 0.075, output: 0.60 },
20
- "gpt-4-turbo" => { input: 10.00, output: 30.00 },
21
- "gpt-4" => { input: 30.00, output: 60.00 },
22
- "gpt-3.5-turbo" => { input: 0.50, output: 1.50 },
23
- "o1" => { input: 15.00, cached_input: 7.50, output: 60.00 },
24
- "o1-mini" => { input: 1.10, cached_input: 0.55, output: 4.40 },
25
- "o3" => { input: 2.00, cached_input: 0.50, output: 8.00 },
26
- "o3-mini" => { input: 1.10, cached_input: 0.55, output: 4.40 },
27
- "o4-mini" => { input: 1.10, cached_input: 0.275, output: 4.40 },
28
-
29
- # Anthropic
30
- "claude-sonnet-4-6" => { input: 3.00, output: 15.00, cache_read_input: 0.30, cache_creation_input: 3.75 },
31
- "claude-opus-4-6" => { input: 5.00, output: 25.00, cache_read_input: 0.50, cache_creation_input: 6.25 },
32
- "claude-opus-4-1" => { input: 15.00, output: 75.00, cache_read_input: 1.50, cache_creation_input: 18.75 },
33
- "claude-opus-4" => { input: 15.00, output: 75.00, cache_read_input: 1.50, cache_creation_input: 18.75 },
34
- "claude-sonnet-4-5" => { input: 3.00, output: 15.00, cache_read_input: 0.30, cache_creation_input: 3.75 },
35
- "claude-sonnet-4" => { input: 3.00, output: 15.00, cache_read_input: 0.30, cache_creation_input: 3.75 },
36
- "claude-haiku-4-5" => { input: 1.00, output: 5.00, cache_read_input: 0.10, cache_creation_input: 1.25 },
37
- "claude-3-7-sonnet" => { input: 3.00, output: 15.00, cache_read_input: 0.30, cache_creation_input: 3.75 },
38
- "claude-3-5-sonnet" => { input: 3.00, output: 15.00, cache_read_input: 0.30, cache_creation_input: 3.75 },
39
- "claude-3-5-haiku" => { input: 0.80, output: 4.00, cache_read_input: 0.08, cache_creation_input: 1.00 },
40
- "claude-3-opus" => { input: 15.00, output: 75.00, cache_read_input: 1.50, cache_creation_input: 18.75 },
41
-
42
- # Google Gemini
43
- "gemini-2.5-pro" => { input: 1.25, cached_input: 0.125, output: 10.00 },
44
- "gemini-2.5-flash" => { input: 0.30, cached_input: 0.03, output: 2.50 },
45
- "gemini-2.5-flash-lite" => { input: 0.10, cached_input: 0.01, output: 0.40 },
46
- "gemini-2.0-flash" => { input: 0.10, cached_input: 0.025, output: 0.40 },
47
- "gemini-2.0-flash-lite" => { input: 0.075, output: 0.30 },
48
- "gemini-1.5-pro" => { input: 1.25, output: 5.00 },
49
- "gemini-1.5-flash" => { input: 0.075, output: 0.30 }
50
- }.freeze
6
+ PRICES = PriceRegistry.builtin_prices
51
7
 
52
8
  class << self
53
9
  def cost_for(model:, input_tokens:, output_tokens:, cached_input_tokens: 0,
@@ -55,54 +11,98 @@ module LlmCostTracker
55
11
  prices = lookup(model)
56
12
  return nil unless prices
57
13
 
58
- cached_input_tokens = cached_input_tokens.to_i
59
- cache_read_input_tokens = cache_read_input_tokens.to_i
60
- cache_creation_input_tokens = cache_creation_input_tokens.to_i
61
- uncached_input_tokens = [input_tokens.to_i - cached_input_tokens, 0].max
62
-
63
- input_cost = (uncached_input_tokens.to_f / 1_000_000) * prices[:input]
64
- cached_input_cost = (cached_input_tokens.to_f / 1_000_000) *
65
- (prices[:cached_input] || prices[:input])
66
- cache_read_input_cost = (cache_read_input_tokens.to_f / 1_000_000) *
67
- (prices[:cache_read_input] || prices[:cached_input] || prices[:input])
68
- cache_creation_input_cost = (cache_creation_input_tokens.to_f / 1_000_000) *
69
- (prices[:cache_creation_input] || prices[:input])
70
- output_cost = (output_tokens.to_f / 1_000_000) * prices[:output]
71
- total_cost = input_cost + cached_input_cost + cache_read_input_cost +
72
- cache_creation_input_cost + output_cost
14
+ token_counts = normalized_token_counts(input_tokens, output_tokens, cached_input_tokens,
15
+ cache_read_input_tokens, cache_creation_input_tokens)
16
+ costs = calculate_costs(token_counts, prices)
73
17
 
74
18
  {
75
- input_cost: input_cost.round(8),
76
- cached_input_cost: cached_input_cost.round(8),
77
- cache_read_input_cost: cache_read_input_cost.round(8),
78
- cache_creation_input_cost: cache_creation_input_cost.round(8),
79
- output_cost: output_cost.round(8),
80
- total_cost: total_cost.round(8),
19
+ input_cost: costs[:input].round(8),
20
+ cached_input_cost: costs[:cached_input].round(8),
21
+ cache_read_input_cost: costs[:cache_read_input].round(8),
22
+ cache_creation_input_cost: costs[:cache_creation_input].round(8),
23
+ output_cost: costs[:output].round(8),
24
+ total_cost: costs.values.sum.round(8),
81
25
  currency: "USD"
82
26
  }
83
27
  end
84
28
 
85
29
  def lookup(model)
86
- overrides = LlmCostTracker.configuration.pricing_overrides
87
- overrides[model] || PRICES[model] || fuzzy_match(model)
30
+ table = prices
31
+ model_name = model.to_s
32
+ normalized_model = normalize_model_name(model_name)
33
+
34
+ table[model_name] || table[normalized_model] || fuzzy_match(model_name, normalized_model, table)
88
35
  end
89
36
 
90
37
  def models
91
- PRICES.keys | LlmCostTracker.configuration.pricing_overrides.keys
38
+ prices.keys
39
+ end
40
+
41
+ def metadata
42
+ PriceRegistry.metadata
43
+ end
44
+
45
+ def prices
46
+ PRICES
47
+ .merge(PriceRegistry.file_prices(LlmCostTracker.configuration.prices_file))
48
+ .merge(PriceRegistry.normalize_price_table(LlmCostTracker.configuration.pricing_overrides))
92
49
  end
93
50
 
94
51
  private
95
52
 
96
- # Try to match model names like "gpt-4o-2024-08-06" to "gpt-4o"
97
- def fuzzy_match(model)
98
- return nil unless model
53
+ def normalized_token_counts(input_tokens, output_tokens, cached_input_tokens,
54
+ cache_read_input_tokens, cache_creation_input_tokens)
55
+ cached_input_tokens = cached_input_tokens.to_i
56
+
57
+ {
58
+ input: [input_tokens.to_i - cached_input_tokens, 0].max,
59
+ cached_input: cached_input_tokens,
60
+ cache_read_input: cache_read_input_tokens.to_i,
61
+ cache_creation_input: cache_creation_input_tokens.to_i,
62
+ output: output_tokens.to_i
63
+ }
64
+ end
65
+
66
+ def calculate_costs(token_counts, prices)
67
+ {
68
+ input: token_cost(token_counts[:input], prices[:input]),
69
+ cached_input: token_cost(token_counts[:cached_input], prices[:cached_input] || prices[:input]),
70
+ cache_read_input: token_cost(
71
+ token_counts[:cache_read_input],
72
+ prices[:cache_read_input] || prices[:cached_input] || prices[:input]
73
+ ),
74
+ cache_creation_input: token_cost(
75
+ token_counts[:cache_creation_input],
76
+ prices[:cache_creation_input] || prices[:input]
77
+ ),
78
+ output: token_cost(token_counts[:output], prices[:output])
79
+ }
80
+ end
81
+
82
+ def token_cost(tokens, per_million_price)
83
+ (tokens.to_f / 1_000_000) * per_million_price
84
+ end
99
85
 
100
- PRICES.sort_by { |key, _value| -key.length }.each do |key, value|
101
- return value if model.start_with?(key)
86
+ def normalize_model_name(model)
87
+ model.to_s.split("/").last
88
+ end
89
+
90
+ # Try to match model names like "gpt-4o-2024-08-06" to "gpt-4o".
91
+ def fuzzy_match(model, normalized_model, table)
92
+ sorted_price_keys(table).each do |key|
93
+ return table[key] if model.start_with?(key) || normalized_model.start_with?(key)
102
94
  end
103
95
 
104
96
  nil
105
97
  end
98
+
99
+ def sorted_price_keys(table)
100
+ cache_key = table.keys
101
+ return @sorted_price_keys if @sorted_price_keys_cache_key == cache_key
102
+
103
+ @sorted_price_keys_cache_key = cache_key
104
+ @sorted_price_keys = cache_key.sort_by { |key| -key.length }
105
+ end
106
106
  end
107
107
  end
108
108
  end
@@ -3,7 +3,10 @@
3
3
  module LlmCostTracker
4
4
  class Railtie < Rails::Railtie
5
5
  generators do
6
+ require_relative "generators/llm_cost_tracker/add_latency_ms_generator"
6
7
  require_relative "generators/llm_cost_tracker/install_generator"
8
+ require_relative "generators/llm_cost_tracker/upgrade_cost_precision_generator"
9
+ require_relative "generators/llm_cost_tracker/upgrade_tags_to_jsonb_generator"
7
10
  end
8
11
 
9
12
  initializer "llm_cost_tracker.configure" do
@@ -5,7 +5,9 @@ module LlmCostTracker
5
5
  class ActiveRecordStore
6
6
  class << self
7
7
  def save(event)
8
- model_class.create!(
8
+ tags = stringify_tags(event[:tags] || {})
9
+
10
+ attributes = {
9
11
  provider: event[:provider],
10
12
  model: event[:model],
11
13
  input_tokens: event[:input_tokens],
@@ -14,9 +16,12 @@ module LlmCostTracker
14
16
  input_cost: event.dig(:cost, :input_cost),
15
17
  output_cost: event.dig(:cost, :output_cost),
16
18
  total_cost: event.dig(:cost, :total_cost),
17
- tags: stringify_tags(event[:tags]).to_json,
19
+ tags: tags_for_storage(tags),
18
20
  tracked_at: event[:tracked_at]
19
- )
21
+ }
22
+ attributes[:latency_ms] = event[:latency_ms] if model_class.latency_column?
23
+
24
+ model_class.create!(attributes)
20
25
  end
21
26
 
22
27
  def monthly_total(time: Time.now.utc)
@@ -38,6 +43,10 @@ module LlmCostTracker
38
43
  tags.transform_keys(&:to_s).transform_values { |value| stringify_tag_value(value) }
39
44
  end
40
45
 
46
+ def tags_for_storage(tags)
47
+ model_class.tags_json_column? ? tags : tags.to_json
48
+ end
49
+
41
50
  def stringify_tag_value(value)
42
51
  return value.transform_values { |nested| stringify_tag_value(nested) } if value.is_a?(Hash)
43
52
 
@@ -5,8 +5,12 @@ module LlmCostTracker
5
5
  EVENT_NAME = "llm_request.llm_cost_tracker"
6
6
 
7
7
  class << self
8
- def record(provider:, model:, input_tokens:, output_tokens:, metadata: {})
9
- usage = usage_data(input_tokens, output_tokens, metadata)
8
+ def enforce_budget!
9
+ Budget.enforce!
10
+ end
11
+
12
+ def record(provider:, model:, input_tokens:, output_tokens:, metadata: {}, latency_ms: nil)
13
+ usage = EventMetadata.usage_data(input_tokens, output_tokens, metadata)
10
14
 
11
15
  cost_data = Pricing.cost_for(
12
16
  model: model,
@@ -17,6 +21,8 @@ module LlmCostTracker
17
21
  cache_creation_input_tokens: usage[:cache_creation_input_tokens]
18
22
  )
19
23
 
24
+ UnknownPricing.handle!(model) unless cost_data
25
+
20
26
  event = {
21
27
  provider: provider,
22
28
  model: model,
@@ -24,7 +30,8 @@ module LlmCostTracker
24
30
  output_tokens: usage[:output_tokens],
25
31
  total_tokens: usage[:total_tokens],
26
32
  cost: cost_data,
27
- tags: LlmCostTracker.configuration.default_tags.merge(metadata),
33
+ tags: LlmCostTracker.configuration.default_tags.merge(EventMetadata.tags(metadata)),
34
+ latency_ms: normalized_latency_ms(latency_ms),
28
35
  tracked_at: Time.now.utc
29
36
  }
30
37
 
@@ -32,10 +39,8 @@ module LlmCostTracker
32
39
  ActiveSupport::Notifications.instrument(EVENT_NAME, event)
33
40
 
34
41
  # Store based on backend
35
- store(event)
36
-
37
- # Budget check
38
- check_budget(event)
42
+ stored = store(event)
43
+ Budget.check!(event) unless stored == false
39
44
 
40
45
  event
41
46
  end
@@ -53,6 +58,13 @@ module LlmCostTracker
53
58
  when :custom
54
59
  config.custom_storage&.call(event)
55
60
  end
61
+
62
+ true
63
+ rescue BudgetExceededError, UnknownPricingError
64
+ raise
65
+ rescue StandardError => e
66
+ handle_storage_error(e)
67
+ false
56
68
  end
57
69
 
58
70
  def log_event(event)
@@ -61,6 +73,7 @@ module LlmCostTracker
61
73
  message = "[LlmCostTracker] #{event[:provider]}/#{event[:model]} " \
62
74
  "tokens=#{event[:input_tokens]}+#{event[:output_tokens]} " \
63
75
  "cost=#{cost_str}"
76
+ message += " latency=#{event[:latency_ms]}ms" if event[:latency_ms]
64
77
  message += " tags=#{event[:tags]}" unless event[:tags].empty?
65
78
 
66
79
  case LlmCostTracker.configuration.log_level
@@ -76,6 +89,16 @@ module LlmCostTracker
76
89
  warn(message) unless defined?(Rails)
77
90
  end
78
91
 
92
+ def log_warning(message)
93
+ message = "[LlmCostTracker] #{message}"
94
+
95
+ if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
96
+ Rails.logger.warn(message)
97
+ else
98
+ warn message
99
+ end
100
+ end
101
+
79
102
  def store_active_record(event)
80
103
  require_relative "llm_api_call" unless defined?(LlmCostTracker::LlmApiCall)
81
104
  require_relative "storage/active_record_store" unless defined?(LlmCostTracker::Storage::ActiveRecordStore)
@@ -85,59 +108,31 @@ module LlmCostTracker
85
108
  raise Error, "ActiveRecord storage requires the active_record gem: #{e.message}"
86
109
  end
87
110
 
88
- def check_budget(event)
89
- config = LlmCostTracker.configuration
90
- return unless config.monthly_budget && config.on_budget_exceeded
91
- return unless event[:cost]
92
-
93
- monthly_total = calculate_monthly_total(event[:cost][:total_cost])
94
- return unless monthly_total > config.monthly_budget
95
-
96
- config.on_budget_exceeded.call(
97
- monthly_total: monthly_total,
98
- budget: config.monthly_budget,
99
- last_event: event
100
- )
101
- end
102
-
103
- def calculate_monthly_total(latest_cost)
104
- # For :active_record backend, query the DB
105
- if LlmCostTracker.configuration.active_record? &&
106
- defined?(LlmCostTracker::Storage::ActiveRecordStore)
107
- LlmCostTracker::Storage::ActiveRecordStore.monthly_total
108
- else
109
- # For other backends, we can only report the latest cost
110
- latest_cost
111
+ def handle_storage_error(error)
112
+ case storage_error_behavior
113
+ when :ignore
114
+ nil
115
+ when :warn
116
+ log_warning("Storage failed; tracking event was not persisted: #{error.class}: #{error.message}")
117
+ when :raise
118
+ storage_error = StorageError.new(error)
119
+ raise storage_error
111
120
  end
112
121
  end
113
122
 
114
- def usage_data(input_tokens, output_tokens, metadata)
115
- cache_read_input_tokens = integer_metadata(metadata, :cache_read_input_tokens, :cache_read_tokens)
116
- cache_creation_input_tokens = integer_metadata(
117
- metadata,
118
- :cache_creation_input_tokens,
119
- :cache_creation_tokens
120
- )
121
- cached_input_tokens = integer_metadata(metadata, :cached_input_tokens)
122
-
123
- {
124
- input_tokens: input_tokens.to_i,
125
- output_tokens: output_tokens.to_i,
126
- cached_input_tokens: cached_input_tokens,
127
- cache_read_input_tokens: cache_read_input_tokens,
128
- cache_creation_input_tokens: cache_creation_input_tokens,
129
- total_tokens: input_tokens.to_i + output_tokens.to_i +
130
- cache_read_input_tokens + cache_creation_input_tokens
131
- }
123
+ def storage_error_behavior
124
+ behavior = (LlmCostTracker.configuration.storage_error_behavior || :warn).to_sym
125
+ return behavior if Configuration::STORAGE_ERROR_BEHAVIORS.include?(behavior)
126
+
127
+ raise Error,
128
+ "Unknown storage_error_behavior: #{behavior.inspect}. " \
129
+ "Use one of: #{Configuration::STORAGE_ERROR_BEHAVIORS.join(', ')}"
132
130
  end
133
131
 
134
- def integer_metadata(metadata, *keys)
135
- keys.each do |key|
136
- value = metadata[key] || metadata[key.to_s]
137
- return value.to_i unless value.nil?
138
- end
132
+ def normalized_latency_ms(latency_ms)
133
+ return nil if latency_ms.nil?
139
134
 
140
- 0
135
+ [latency_ms.to_i, 0].max
141
136
  end
142
137
  end
143
138
  end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LlmCostTracker
4
+ class UnknownPricing
5
+ class << self
6
+ def handle!(model)
7
+ model = normalized_model_name(model)
8
+
9
+ case behavior
10
+ when :ignore
11
+ nil
12
+ when :warn
13
+ warn_missing(model)
14
+ when :raise
15
+ raise UnknownPricingError.new(model: model)
16
+ end
17
+ end
18
+
19
+ private
20
+
21
+ def normalized_model_name(model)
22
+ model.to_s.empty? ? "unknown" : model.to_s
23
+ end
24
+
25
+ def warn_missing(model)
26
+ message = "[LlmCostTracker] No pricing configured for model #{model.inspect}. " \
27
+ "Cost and budget enforcement will be skipped for this event. " \
28
+ "Add a pricing_overrides entry or set unknown_pricing_behavior."
29
+
30
+ if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
31
+ Rails.logger.warn(message)
32
+ else
33
+ Kernel.warn(message)
34
+ end
35
+ end
36
+
37
+ def behavior
38
+ behavior = (LlmCostTracker.configuration.unknown_pricing_behavior || :warn).to_sym
39
+ return behavior if Configuration::UNKNOWN_PRICING_BEHAVIORS.include?(behavior)
40
+
41
+ raise Error,
42
+ "Unknown unknown_pricing_behavior: #{behavior.inspect}. " \
43
+ "Use one of: #{Configuration::UNKNOWN_PRICING_BEHAVIORS.join(', ')}"
44
+ end
45
+ end
46
+ end
47
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module LlmCostTracker
4
- VERSION = "0.1.1"
4
+ VERSION = "0.1.2"
5
5
  end
@@ -5,31 +5,39 @@ require "active_support/notifications"
5
5
 
6
6
  require_relative "llm_cost_tracker/version"
7
7
  require_relative "llm_cost_tracker/configuration"
8
+ require_relative "llm_cost_tracker/errors"
9
+ require_relative "llm_cost_tracker/price_registry"
8
10
  require_relative "llm_cost_tracker/pricing"
9
11
  require_relative "llm_cost_tracker/parsers/base"
10
12
  require_relative "llm_cost_tracker/parsers/openai"
13
+ require_relative "llm_cost_tracker/parsers/openai_compatible"
11
14
  require_relative "llm_cost_tracker/parsers/anthropic"
12
15
  require_relative "llm_cost_tracker/parsers/gemini"
13
16
  require_relative "llm_cost_tracker/parsers/registry"
14
17
  require_relative "llm_cost_tracker/middleware/faraday"
18
+ require_relative "llm_cost_tracker/budget"
19
+ require_relative "llm_cost_tracker/unknown_pricing"
20
+ require_relative "llm_cost_tracker/event_metadata"
15
21
  require_relative "llm_cost_tracker/tracker"
16
22
 
17
23
  module LlmCostTracker
18
- class Error < StandardError; end
19
-
20
24
  class << self
25
+ CONFIGURATION_MUTEX = Mutex.new
26
+
21
27
  attr_writer :configuration
22
28
 
23
29
  def configuration
24
- @configuration ||= Configuration.new
30
+ @configuration || CONFIGURATION_MUTEX.synchronize { @configuration ||= Configuration.new }
25
31
  end
26
32
 
27
33
  def configure
28
34
  yield(configuration)
35
+ configuration.normalize_openai_compatible_providers!
36
+ warn_for_configuration!
29
37
  end
30
38
 
31
39
  def reset_configuration!
32
- @configuration = Configuration.new
40
+ CONFIGURATION_MUTEX.synchronize { @configuration = Configuration.new }
33
41
  end
34
42
 
35
43
  # Manual tracking for non-Faraday clients
@@ -42,15 +50,35 @@ module LlmCostTracker
42
50
  # feature: "chat",
43
51
  # user_id: current_user.id
44
52
  # )
45
- def track(provider:, model:, input_tokens:, output_tokens:, **metadata)
53
+ def track(provider:, model:, input_tokens:, output_tokens:, latency_ms: nil, **metadata)
46
54
  Tracker.record(
47
55
  provider: provider.to_s,
48
56
  model: model,
49
57
  input_tokens: input_tokens,
50
58
  output_tokens: output_tokens,
59
+ latency_ms: latency_ms,
51
60
  metadata: metadata
52
61
  )
53
62
  end
63
+
64
+ private
65
+
66
+ def warn_for_configuration!
67
+ return unless (configuration.budget_exceeded_behavior || :notify).to_sym == :block_requests
68
+ return if configuration.active_record?
69
+
70
+ log_warning(":block_requests requires storage_backend = :active_record; preflight blocking will be skipped.")
71
+ end
72
+
73
+ def log_warning(message)
74
+ message = "[LlmCostTracker] #{message}"
75
+
76
+ if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
77
+ Rails.logger.warn(message)
78
+ else
79
+ warn message
80
+ end
81
+ end
54
82
  end
55
83
  end
56
84
 
@@ -8,10 +8,11 @@ Gem::Specification.new do |spec|
8
8
  spec.authors = ["Sergii Khomenko"]
9
9
  spec.email = ["sergey@mm.st"]
10
10
 
11
- spec.summary = "Self-hosted LLM API cost tracking for Ruby and Rails"
12
- spec.description = "Tracks token usage and estimated costs for OpenAI, Anthropic, and Google Gemini calls. " \
11
+ spec.summary = "Self-hosted LLM API cost guardrails for Ruby and Rails"
12
+ spec.description = "Tracks token usage and estimated costs for OpenAI, Anthropic, Google Gemini, " \
13
+ "OpenRouter, DeepSeek, and OpenAI-compatible calls. " \
13
14
  "Works as Faraday middleware for Ruby clients, with ActiveRecord storage, " \
14
- "per-user/per-feature attribution, and budget alerts."
15
+ "per-user/per-feature attribution, budget alerts, and budget enforcement."
15
16
  spec.homepage = "https://github.com/sergey-homenko/llm_cost_tracker"
16
17
  spec.license = "MIT"
17
18