llm_cost_tracker 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +35 -0
  3. data/README.md +34 -14
  4. data/app/services/llm_cost_tracker/dashboard/data_quality.rb +101 -19
  5. data/app/views/llm_cost_tracker/data_quality/index.html.erb +65 -0
  6. data/lib/llm_cost_tracker/budget.rb +85 -21
  7. data/lib/llm_cost_tracker/configuration.rb +4 -0
  8. data/lib/llm_cost_tracker/cost.rb +1 -2
  9. data/lib/llm_cost_tracker/errors.rb +22 -3
  10. data/lib/llm_cost_tracker/event.rb +4 -0
  11. data/lib/llm_cost_tracker/event_metadata.rb +21 -15
  12. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_period_totals_generator.rb +29 -0
  13. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_usage_breakdown_generator.rb +29 -0
  14. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_period_totals_to_llm_cost_tracker.rb.erb +66 -0
  15. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_usage_breakdown_to_llm_api_calls.rb.erb +29 -0
  16. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_api_calls.rb.erb +15 -0
  17. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +3 -1
  18. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/llm_cost_tracker_prices.yml.erb +11 -3
  19. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_tags_to_jsonb.rb.erb +1 -0
  20. data/lib/llm_cost_tracker/middleware/faraday.rb +27 -9
  21. data/lib/llm_cost_tracker/parsed_usage.rb +16 -7
  22. data/lib/llm_cost_tracker/parsers/anthropic.rb +7 -6
  23. data/lib/llm_cost_tracker/parsers/base.rb +2 -1
  24. data/lib/llm_cost_tracker/parsers/gemini.rb +5 -2
  25. data/lib/llm_cost_tracker/parsers/openai_usage.rb +18 -5
  26. data/lib/llm_cost_tracker/period_total.rb +9 -0
  27. data/lib/llm_cost_tracker/price_registry.rb +14 -4
  28. data/lib/llm_cost_tracker/price_sync/merger.rb +1 -1
  29. data/lib/llm_cost_tracker/price_sync/raw_price.rb +3 -5
  30. data/lib/llm_cost_tracker/price_sync/sources/litellm.rb +2 -3
  31. data/lib/llm_cost_tracker/price_sync/sources/open_router.rb +2 -3
  32. data/lib/llm_cost_tracker/prices.json +30 -30
  33. data/lib/llm_cost_tracker/pricing.rb +44 -32
  34. data/lib/llm_cost_tracker/railtie.rb +2 -0
  35. data/lib/llm_cost_tracker/storage/active_record_rollups.rb +122 -0
  36. data/lib/llm_cost_tracker/storage/active_record_store.rb +38 -13
  37. data/lib/llm_cost_tracker/stream_collector.rb +5 -3
  38. data/lib/llm_cost_tracker/tags_column.rb +19 -0
  39. data/lib/llm_cost_tracker/tracker.rb +58 -32
  40. data/lib/llm_cost_tracker/unknown_pricing.rb +14 -0
  41. data/lib/llm_cost_tracker/usage_breakdown.rb +30 -0
  42. data/lib/llm_cost_tracker/version.rb +1 -1
  43. data/lib/llm_cost_tracker.rb +12 -3
  44. metadata +10 -4
  45. data/llm_cost_tracker.gemspec +0 -50
@@ -65,9 +65,8 @@ module LlmCostTracker
65
65
  provider: provider,
66
66
  input: price_per_million(entry["input_cost_per_token"]),
67
67
  output: price_per_million(entry["output_cost_per_token"]),
68
- cached_input: provider == "anthropic" ? nil : cache_read,
69
- cache_read_input: provider == "anthropic" ? cache_read : nil,
70
- cache_creation_input: provider == "anthropic" ? cache_write : nil,
68
+ cache_read_input: cache_read,
69
+ cache_write_input: cache_write,
71
70
  source: name,
72
71
  source_version: response_version(response),
73
72
  fetched_at: response.fetched_at
@@ -68,9 +68,8 @@ module LlmCostTracker
68
68
  provider: provider,
69
69
  input: price_per_million(pricing["prompt"]),
70
70
  output: price_per_million(pricing["completion"]),
71
- cached_input: provider == "anthropic" ? nil : cache_read,
72
- cache_read_input: provider == "anthropic" ? cache_read : nil,
73
- cache_creation_input: provider == "anthropic" ? cache_write : nil,
71
+ cache_read_input: cache_read,
72
+ cache_write_input: cache_write,
74
73
  source: name,
75
74
  source_version: response_version(response),
76
75
  fetched_at: response.fetched_at
@@ -10,40 +10,40 @@
10
10
  ]
11
11
  },
12
12
  "models": {
13
- "gpt-5.2": { "input": 1.75, "cached_input": 0.175, "output": 14.0 },
14
- "gpt-5.1": { "input": 1.25, "cached_input": 0.125, "output": 10.0 },
15
- "gpt-5": { "input": 1.25, "cached_input": 0.125, "output": 10.0 },
16
- "gpt-5-mini": { "input": 0.25, "cached_input": 0.025, "output": 2.0 },
17
- "gpt-5-nano": { "input": 0.05, "cached_input": 0.005, "output": 0.4 },
18
- "gpt-4.1": { "input": 2.0, "cached_input": 0.5, "output": 8.0 },
19
- "gpt-4.1-mini": { "input": 0.4, "cached_input": 0.1, "output": 1.6 },
20
- "gpt-4.1-nano": { "input": 0.1, "cached_input": 0.025, "output": 0.4 },
13
+ "gpt-5.2": { "input": 1.75, "cache_read_input": 0.175, "output": 14.0 },
14
+ "gpt-5.1": { "input": 1.25, "cache_read_input": 0.125, "output": 10.0 },
15
+ "gpt-5": { "input": 1.25, "cache_read_input": 0.125, "output": 10.0 },
16
+ "gpt-5-mini": { "input": 0.25, "cache_read_input": 0.025, "output": 2.0 },
17
+ "gpt-5-nano": { "input": 0.05, "cache_read_input": 0.005, "output": 0.4 },
18
+ "gpt-4.1": { "input": 2.0, "cache_read_input": 0.5, "output": 8.0 },
19
+ "gpt-4.1-mini": { "input": 0.4, "cache_read_input": 0.1, "output": 1.6 },
20
+ "gpt-4.1-nano": { "input": 0.1, "cache_read_input": 0.025, "output": 0.4 },
21
21
  "gpt-4o-2024-05-13": { "input": 5.0, "output": 15.0 },
22
- "gpt-4o": { "input": 2.5, "cached_input": 1.25, "output": 10.0 },
23
- "gpt-4o-mini": { "input": 0.15, "cached_input": 0.075, "output": 0.6 },
22
+ "gpt-4o": { "input": 2.5, "cache_read_input": 1.25, "output": 10.0 },
23
+ "gpt-4o-mini": { "input": 0.15, "cache_read_input": 0.075, "output": 0.6 },
24
24
  "gpt-4-turbo": { "input": 10.0, "output": 30.0 },
25
25
  "gpt-4": { "input": 30.0, "output": 60.0 },
26
26
  "gpt-3.5-turbo": { "input": 0.5, "output": 1.5 },
27
- "o1": { "input": 15.0, "cached_input": 7.5, "output": 60.0 },
28
- "o1-mini": { "input": 1.1, "cached_input": 0.55, "output": 4.4 },
29
- "o3": { "input": 2.0, "cached_input": 0.5, "output": 8.0 },
30
- "o3-mini": { "input": 1.1, "cached_input": 0.55, "output": 4.4 },
31
- "o4-mini": { "input": 1.1, "cached_input": 0.275, "output": 4.4 },
32
- "claude-sonnet-4-6": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_creation_input": 3.75 },
33
- "claude-opus-4-6": { "input": 5.0, "output": 25.0, "cache_read_input": 0.5, "cache_creation_input": 6.25 },
34
- "claude-opus-4-1": { "input": 15.0, "output": 75.0, "cache_read_input": 1.5, "cache_creation_input": 18.75 },
35
- "claude-opus-4": { "input": 15.0, "output": 75.0, "cache_read_input": 1.5, "cache_creation_input": 18.75 },
36
- "claude-sonnet-4-5": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_creation_input": 3.75 },
37
- "claude-sonnet-4": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_creation_input": 3.75 },
38
- "claude-haiku-4-5": { "input": 1.0, "output": 5.0, "cache_read_input": 0.1, "cache_creation_input": 1.25 },
39
- "claude-3-7-sonnet": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_creation_input": 3.75 },
40
- "claude-3-5-sonnet": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_creation_input": 3.75 },
41
- "claude-3-5-haiku": { "input": 0.8, "output": 4.0, "cache_read_input": 0.08, "cache_creation_input": 1.0 },
42
- "claude-3-opus": { "input": 15.0, "output": 75.0, "cache_read_input": 1.5, "cache_creation_input": 18.75 },
43
- "gemini-2.5-pro": { "input": 1.25, "cached_input": 0.125, "output": 10.0 },
44
- "gemini-2.5-flash": { "input": 0.3, "cached_input": 0.03, "output": 2.5 },
45
- "gemini-2.5-flash-lite": { "input": 0.1, "cached_input": 0.01, "output": 0.4 },
46
- "gemini-2.0-flash": { "input": 0.1, "cached_input": 0.025, "output": 0.4 },
27
+ "o1": { "input": 15.0, "cache_read_input": 7.5, "output": 60.0 },
28
+ "o1-mini": { "input": 1.1, "cache_read_input": 0.55, "output": 4.4 },
29
+ "o3": { "input": 2.0, "cache_read_input": 0.5, "output": 8.0 },
30
+ "o3-mini": { "input": 1.1, "cache_read_input": 0.55, "output": 4.4 },
31
+ "o4-mini": { "input": 1.1, "cache_read_input": 0.275, "output": 4.4 },
32
+ "claude-sonnet-4-6": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_write_input": 3.75 },
33
+ "claude-opus-4-6": { "input": 5.0, "output": 25.0, "cache_read_input": 0.5, "cache_write_input": 6.25 },
34
+ "claude-opus-4-1": { "input": 15.0, "output": 75.0, "cache_read_input": 1.5, "cache_write_input": 18.75 },
35
+ "claude-opus-4": { "input": 15.0, "output": 75.0, "cache_read_input": 1.5, "cache_write_input": 18.75 },
36
+ "claude-sonnet-4-5": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_write_input": 3.75 },
37
+ "claude-sonnet-4": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_write_input": 3.75 },
38
+ "claude-haiku-4-5": { "input": 1.0, "output": 5.0, "cache_read_input": 0.1, "cache_write_input": 1.25 },
39
+ "claude-3-7-sonnet": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_write_input": 3.75 },
40
+ "claude-3-5-sonnet": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_write_input": 3.75 },
41
+ "claude-3-5-haiku": { "input": 0.8, "output": 4.0, "cache_read_input": 0.08, "cache_write_input": 1.0 },
42
+ "claude-3-opus": { "input": 15.0, "output": 75.0, "cache_read_input": 1.5, "cache_write_input": 18.75 },
43
+ "gemini-2.5-pro": { "input": 1.25, "cache_read_input": 0.125, "output": 10.0 },
44
+ "gemini-2.5-flash": { "input": 0.3, "cache_read_input": 0.03, "output": 2.5 },
45
+ "gemini-2.5-flash-lite": { "input": 0.1, "cache_read_input": 0.01, "output": 0.4 },
46
+ "gemini-2.0-flash": { "input": 0.1, "cache_read_input": 0.025, "output": 0.4 },
47
47
  "gemini-2.0-flash-lite": { "input": 0.075, "output": 0.3 },
48
48
  "gemini-1.5-pro": { "input": 1.25, "output": 5.0 },
49
49
  "gemini-1.5-flash": { "input": 0.075, "output": 0.3 }
@@ -8,32 +8,40 @@ module LlmCostTracker
8
8
  MUTEX = Monitor.new
9
9
 
10
10
  class << self
11
- def cost_for(model:, input_tokens:, output_tokens:, cached_input_tokens: 0,
12
- cache_read_input_tokens: 0, cache_creation_input_tokens: 0)
13
- prices = lookup(model)
11
+ def cost_for(provider:, model:, input_tokens:, output_tokens:, cache_read_input_tokens: 0,
12
+ cache_write_input_tokens: 0, pricing_mode: nil)
13
+ prices = lookup(provider: provider, model: model)
14
14
  return nil unless prices
15
15
 
16
- token_counts = normalized_token_counts(input_tokens, output_tokens, cached_input_tokens,
17
- cache_read_input_tokens, cache_creation_input_tokens)
18
- costs = calculate_costs(token_counts, prices)
16
+ usage = UsageBreakdown.build(
17
+ input_tokens: input_tokens,
18
+ output_tokens: output_tokens,
19
+ cache_read_input_tokens: cache_read_input_tokens,
20
+ cache_write_input_tokens: cache_write_input_tokens
21
+ )
22
+ costs = calculate_costs(usage, prices, pricing_mode: pricing_mode)
19
23
 
20
24
  Cost.new(
21
25
  input_cost: costs[:input].round(8),
22
- cached_input_cost: costs[:cached_input].round(8),
23
26
  cache_read_input_cost: costs[:cache_read_input].round(8),
24
- cache_creation_input_cost: costs[:cache_creation_input].round(8),
27
+ cache_write_input_cost: costs[:cache_write_input].round(8),
25
28
  output_cost: costs[:output].round(8),
26
29
  total_cost: costs.values.sum.round(8),
27
30
  currency: "USD"
28
31
  )
29
32
  end
30
33
 
31
- def lookup(model)
34
+ def lookup(provider:, model:)
32
35
  table = prices
36
+ provider_name = provider.to_s
33
37
  model_name = model.to_s
38
+ provider_model = provider_name.empty? ? model_name : "#{provider_name}/#{model_name}"
34
39
  normalized_model = normalize_model_name(model_name)
35
40
 
36
- table[model_name] || table[normalized_model] || fuzzy_match(model_name, normalized_model, table)
41
+ table[provider_model] ||
42
+ table[model_name] ||
43
+ table[normalized_model] ||
44
+ fuzzy_match(provider_model, normalized_model, table)
37
45
  end
38
46
 
39
47
  def models
@@ -64,36 +72,40 @@ module LlmCostTracker
64
72
 
65
73
  private
66
74
 
67
- def normalized_token_counts(input_tokens, output_tokens, cached_input_tokens,
68
- cache_read_input_tokens, cache_creation_input_tokens)
69
- cached_input_tokens = cached_input_tokens.to_i
70
-
75
+ def calculate_costs(usage, prices, pricing_mode:)
71
76
  {
72
- input: [input_tokens.to_i - cached_input_tokens, 0].max,
73
- cached_input: cached_input_tokens,
74
- cache_read_input: cache_read_input_tokens.to_i,
75
- cache_creation_input: cache_creation_input_tokens.to_i,
76
- output: output_tokens.to_i
77
- }
78
- end
79
-
80
- def calculate_costs(token_counts, prices)
81
- {
82
- input: token_cost(token_counts[:input], prices[:input]),
83
- cached_input: token_cost(token_counts[:cached_input], prices[:cached_input] || prices[:input]),
77
+ input: token_cost(usage.input_tokens, price_for(prices, :input, pricing_mode)),
84
78
  cache_read_input: token_cost(
85
- token_counts[:cache_read_input],
86
- prices[:cache_read_input] || prices[:cached_input] || prices[:input]
79
+ usage.cache_read_input_tokens,
80
+ price_for(prices, :cache_read_input, pricing_mode) || price_for(prices, :input, pricing_mode)
87
81
  ),
88
- cache_creation_input: token_cost(
89
- token_counts[:cache_creation_input],
90
- prices[:cache_creation_input] || prices[:input]
82
+ cache_write_input: token_cost(
83
+ usage.cache_write_input_tokens,
84
+ price_for(prices, :cache_write_input, pricing_mode) || price_for(prices, :input, pricing_mode)
91
85
  ),
92
- output: token_cost(token_counts[:output], prices[:output])
86
+ output: token_cost(usage.output_tokens, price_for(prices, :output, pricing_mode))
93
87
  }
94
88
  end
95
89
 
90
+ def price_for(prices, key, pricing_mode)
91
+ mode = normalized_pricing_mode(pricing_mode)
92
+ return prices[key] unless mode
93
+
94
+ prices[:"#{mode}_#{key}"] || prices[key]
95
+ end
96
+
97
+ def normalized_pricing_mode(value)
98
+ return nil if value.nil?
99
+
100
+ mode = value.to_s.strip
101
+ return nil if mode.empty? || mode == "standard"
102
+
103
+ mode
104
+ end
105
+
96
106
  def token_cost(tokens, per_million_price)
107
+ return 0.0 if tokens.to_i.zero?
108
+
97
109
  (tokens.to_f / 1_000_000) * per_million_price
98
110
  end
99
111
 
@@ -3,9 +3,11 @@
3
3
  module LlmCostTracker
4
4
  class Railtie < Rails::Railtie
5
5
  generators do
6
+ require_relative "generators/llm_cost_tracker/add_period_totals_generator"
6
7
  require_relative "generators/llm_cost_tracker/add_latency_ms_generator"
7
8
  require_relative "generators/llm_cost_tracker/add_provider_response_id_generator"
8
9
  require_relative "generators/llm_cost_tracker/add_streaming_generator"
10
+ require_relative "generators/llm_cost_tracker/add_usage_breakdown_generator"
9
11
  require_relative "generators/llm_cost_tracker/install_generator"
10
12
  require_relative "generators/llm_cost_tracker/prices_generator"
11
13
  require_relative "generators/llm_cost_tracker/upgrade_cost_precision_generator"
@@ -0,0 +1,122 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LlmCostTracker
4
+ module Storage
5
+ class ActiveRecordRollups
6
+ PERIODS = {
7
+ monthly: "month",
8
+ daily: "day"
9
+ }.freeze
10
+
11
+ class << self
12
+ def reset!
13
+ remove_instance_variable(:@period_totals_enabled) if instance_variable_defined?(:@period_totals_enabled)
14
+ end
15
+
16
+ def increment!(event)
17
+ return unless event.cost&.total_cost
18
+ return unless period_totals_enabled?
19
+
20
+ PERIODS.each_key { |period| increment_period_total(period, event) }
21
+ end
22
+
23
+ def monthly_total(time: Time.now.utc)
24
+ period_total(:monthly, time)
25
+ end
26
+
27
+ def daily_total(time: Time.now.utc)
28
+ period_total(:daily, time)
29
+ end
30
+
31
+ private
32
+
33
+ def period_total(period, time)
34
+ if period_totals_enabled?
35
+ period_total_model
36
+ .where(period: PERIODS.fetch(period), period_start: bucket_for(period, time))
37
+ .pick(:total_cost)
38
+ .to_f
39
+ else
40
+ LlmCostTracker::LlmApiCall
41
+ .where(tracked_at: range_start_for(period, time)..time)
42
+ .sum(:total_cost)
43
+ .to_f
44
+ end
45
+ end
46
+
47
+ def increment_period_total(period, event)
48
+ model = period_total_model
49
+ model.upsert_all(
50
+ [
51
+ {
52
+ period: PERIODS.fetch(period),
53
+ period_start: bucket_for(period, event.tracked_at),
54
+ total_cost: event.cost.total_cost
55
+ }
56
+ ],
57
+ on_duplicate: total_upsert_sql(model),
58
+ record_timestamps: true,
59
+ unique_by: unique_by(model, %i[period period_start])
60
+ )
61
+ end
62
+
63
+ def period_totals_enabled?
64
+ return @period_totals_enabled unless @period_totals_enabled.nil?
65
+
66
+ @period_totals_enabled =
67
+ LlmCostTracker::LlmApiCall.connection.data_source_exists?("llm_cost_tracker_period_totals")
68
+ end
69
+
70
+ def period_total_model
71
+ require_relative "../period_total" unless defined?(LlmCostTracker::PeriodTotal)
72
+
73
+ LlmCostTracker::PeriodTotal
74
+ end
75
+
76
+ def range_start_for(period, time)
77
+ utc_time = time.to_time.utc
78
+
79
+ case period
80
+ when :monthly then utc_time.beginning_of_month
81
+ when :daily then utc_time.beginning_of_day
82
+ end
83
+ end
84
+
85
+ def bucket_for(period, time)
86
+ utc_time = time.to_time.utc
87
+
88
+ case period
89
+ when :monthly then utc_time.beginning_of_month.to_date
90
+ when :daily then utc_time.to_date
91
+ end
92
+ end
93
+
94
+ def unique_by(model, column)
95
+ return unless model.connection.supports_insert_conflict_target?
96
+
97
+ column
98
+ end
99
+
100
+ def total_upsert_sql(model)
101
+ Arel.sql(case model.connection.adapter_name
102
+ when /mysql/i
103
+ mysql_upsert_sql(model)
104
+ else
105
+ "total_cost = total_cost + excluded.total_cost, updated_at = excluded.updated_at"
106
+ end)
107
+ end
108
+
109
+ def mysql_upsert_sql(model)
110
+ connection = model.connection
111
+ if connection.respond_to?(:supports_insert_raw_alias_syntax?, true) &&
112
+ connection.send(:supports_insert_raw_alias_syntax?)
113
+ values_reference = connection.quote_table_name("#{model.table_name}_values")
114
+ "total_cost = total_cost + #{values_reference}.total_cost, updated_at = #{values_reference}.updated_at"
115
+ else
116
+ "total_cost = total_cost + VALUES(total_cost), updated_at = VALUES(updated_at)"
117
+ end
118
+ end
119
+ end
120
+ end
121
+ end
122
+ end
@@ -1,11 +1,19 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "active_record_rollups"
4
+
3
5
  module LlmCostTracker
4
6
  module Storage
5
7
  class ActiveRecordStore
6
8
  class << self
9
+ def reset!
10
+ ActiveRecordRollups.reset!
11
+ end
12
+
7
13
  def save(event)
8
14
  tags = stringify_tags(event.tags || {})
15
+ model = LlmCostTracker::LlmApiCall
16
+ columns = model.columns_hash
9
17
 
10
18
  attributes = {
11
19
  provider: event.provider,
@@ -16,24 +24,30 @@ module LlmCostTracker
16
24
  input_cost: event.cost&.input_cost,
17
25
  output_cost: event.cost&.output_cost,
18
26
  total_cost: event.cost&.total_cost,
19
- tags: tags_for_storage(tags),
27
+ tags: tags_for_storage(tags, model),
20
28
  tracked_at: event.tracked_at
21
29
  }
22
- attributes[:latency_ms] = event.latency_ms if LlmCostTracker::LlmApiCall.latency_column?
23
- attributes[:stream] = event.stream if LlmCostTracker::LlmApiCall.stream_column?
24
- attributes[:usage_source] = event.usage_source if LlmCostTracker::LlmApiCall.usage_source_column?
25
- if LlmCostTracker::LlmApiCall.provider_response_id_column?
26
- attributes[:provider_response_id] = event.provider_response_id
30
+ optional_attributes(event).each do |name, value|
31
+ attributes[name] = value if columns.key?(name.to_s)
27
32
  end
33
+ attributes[:latency_ms] = event.latency_ms if columns.key?("latency_ms")
34
+ attributes[:stream] = event.stream if columns.key?("stream")
35
+ attributes[:usage_source] = event.usage_source if columns.key?("usage_source")
36
+ attributes[:provider_response_id] = event.provider_response_id if columns.key?("provider_response_id")
28
37
 
29
- LlmCostTracker::LlmApiCall.create!(attributes)
38
+ model.transaction do
39
+ call = model.create!(attributes)
40
+ ActiveRecordRollups.increment!(event)
41
+ call
42
+ end
30
43
  end
31
44
 
32
45
  def monthly_total(time: Time.now.utc)
33
- LlmCostTracker::LlmApiCall
34
- .where(tracked_at: time.beginning_of_month..time)
35
- .sum(:total_cost)
36
- .to_f
46
+ ActiveRecordRollups.monthly_total(time: time)
47
+ end
48
+
49
+ def daily_total(time: Time.now.utc)
50
+ ActiveRecordRollups.daily_total(time: time)
37
51
  end
38
52
 
39
53
  private
@@ -42,8 +56,19 @@ module LlmCostTracker
42
56
  tags.transform_keys(&:to_s).transform_values { |value| stringify_tag_value(value) }
43
57
  end
44
58
 
45
- def tags_for_storage(tags)
46
- LlmCostTracker::LlmApiCall.tags_json_column? ? tags : tags.to_json
59
+ def tags_for_storage(tags, model)
60
+ model.tags_json_column? ? tags : tags.to_json
61
+ end
62
+
63
+ def optional_attributes(event)
64
+ {
65
+ cache_read_input_tokens: event.cache_read_input_tokens,
66
+ cache_write_input_tokens: event.cache_write_input_tokens,
67
+ hidden_output_tokens: event.hidden_output_tokens,
68
+ cache_read_input_cost: event.cost&.cache_read_input_cost,
69
+ cache_write_input_cost: event.cost&.cache_write_input_cost,
70
+ pricing_mode: event.pricing_mode
71
+ }
47
72
  end
48
73
 
49
74
  def stringify_tag_value(value)
@@ -8,11 +8,12 @@ module LlmCostTracker
8
8
  class StreamCollector
9
9
  attr_reader :provider
10
10
 
11
- def initialize(provider:, model:, latency_ms: nil, provider_response_id: nil, metadata: {})
11
+ def initialize(provider:, model:, latency_ms: nil, provider_response_id: nil, pricing_mode: nil, metadata: {})
12
12
  @provider = provider.to_s
13
13
  @model = model
14
14
  @latency_ms = latency_ms
15
15
  @provider_response_id = provider_response_id
16
+ @pricing_mode = pricing_mode
16
17
  @metadata = ValueHelpers.deep_dup(metadata || {})
17
18
  @events = []
18
19
  @explicit_usage = nil
@@ -69,11 +70,12 @@ module LlmCostTracker
69
70
 
70
71
  @finished = true
71
72
  {
72
- events: ValueHelpers.deep_dup(@events),
73
+ events: @events.dup,
73
74
  explicit_usage: ValueHelpers.deep_dup(@explicit_usage),
74
75
  model: @model,
75
76
  latency_ms: @latency_ms,
76
77
  provider_response_id: @provider_response_id,
78
+ pricing_mode: @pricing_mode,
77
79
  metadata: ValueHelpers.deep_dup(@metadata)
78
80
  }
79
81
  end
@@ -88,6 +90,7 @@ module LlmCostTracker
88
90
  stream: true,
89
91
  usage_source: parsed.usage_source,
90
92
  provider_response_id: parsed.provider_response_id || snapshot[:provider_response_id],
93
+ pricing_mode: snapshot[:pricing_mode],
91
94
  metadata: error_metadata(errored).merge(snapshot[:metadata]).merge(parsed.metadata)
92
95
  )
93
96
  end
@@ -136,7 +139,6 @@ module LlmCostTracker
136
139
  model: snapshot[:model],
137
140
  input_tokens: input,
138
141
  output_tokens: output,
139
- total_tokens: input + output,
140
142
  stream: true,
141
143
  usage_source: :manual,
142
144
  **extras
@@ -36,5 +36,24 @@ module LlmCostTracker
36
36
  def provider_response_id_column?
37
37
  columns_hash.key?("provider_response_id")
38
38
  end
39
+
40
+ def pricing_mode_column?
41
+ columns_hash.key?("pricing_mode")
42
+ end
43
+
44
+ def usage_breakdown_columns?
45
+ %w[
46
+ cache_read_input_tokens
47
+ cache_write_input_tokens
48
+ hidden_output_tokens
49
+ ].all? { |column| columns_hash.key?(column) }
50
+ end
51
+
52
+ def usage_breakdown_cost_columns?
53
+ %w[
54
+ cache_read_input_cost
55
+ cache_write_input_cost
56
+ ].all? { |column| columns_hash.key?(column) }
57
+ end
39
58
  end
40
59
  end
@@ -10,30 +10,76 @@ module LlmCostTracker
10
10
 
11
11
  class << self
12
12
  def enforce_budget!
13
+ return unless LlmCostTracker.configuration.enabled
14
+
13
15
  Budget.enforce!
14
16
  end
15
17
 
16
18
  def record(provider:, model:, input_tokens:, output_tokens:, latency_ms: nil, stream: false,
17
- usage_source: nil, provider_response_id: nil, metadata: {})
18
- usage = EventMetadata.usage_data(input_tokens, output_tokens, metadata)
19
+ usage_source: nil, provider_response_id: nil, pricing_mode: nil, metadata: {})
20
+ return unless LlmCostTracker.configuration.enabled
21
+
22
+ usage = usage_data(input_tokens, output_tokens, metadata, pricing_mode)
23
+ cost_data = cost_for_usage(provider, model, usage)
24
+
25
+ UnknownPricing.handle!(model) unless cost_data
26
+
27
+ event = build_event(
28
+ provider: provider,
29
+ model: model,
30
+ usage: usage,
31
+ cost_data: cost_data,
32
+ metadata: metadata,
33
+ latency_ms: latency_ms,
34
+ stream: stream,
35
+ usage_source: usage_source,
36
+ provider_response_id: provider_response_id
37
+ )
38
+
39
+ ActiveSupport::Notifications.instrument(EVENT_NAME, event.to_h)
40
+
41
+ stored = store(event)
42
+ Budget.check!(event) unless stored == false
43
+
44
+ event
45
+ end
46
+
47
+ private
48
+
49
+ def usage_data(input_tokens, output_tokens, metadata, pricing_mode)
50
+ metadata = metadata.merge(pricing_mode: pricing_mode) unless pricing_mode.nil?
51
+
52
+ EventMetadata.usage_data(
53
+ input_tokens,
54
+ output_tokens,
55
+ metadata
56
+ )
57
+ end
19
58
 
20
- cost_data = Pricing.cost_for(
59
+ def cost_for_usage(provider, model, usage)
60
+ Pricing.cost_for(
61
+ provider: provider,
21
62
  model: model,
22
63
  input_tokens: usage[:input_tokens],
23
64
  output_tokens: usage[:output_tokens],
24
- cached_input_tokens: usage[:cached_input_tokens],
25
65
  cache_read_input_tokens: usage[:cache_read_input_tokens],
26
- cache_creation_input_tokens: usage[:cache_creation_input_tokens]
66
+ cache_write_input_tokens: usage[:cache_write_input_tokens],
67
+ pricing_mode: usage[:pricing_mode]
27
68
  )
69
+ end
28
70
 
29
- UnknownPricing.handle!(model) unless cost_data
30
-
31
- event = Event.new(
71
+ def build_event(provider:, model:, usage:, cost_data:, metadata:, latency_ms:, stream:, usage_source:,
72
+ provider_response_id:)
73
+ Event.new(
32
74
  provider: provider,
33
75
  model: model,
34
76
  input_tokens: usage[:input_tokens],
35
77
  output_tokens: usage[:output_tokens],
36
78
  total_tokens: usage[:total_tokens],
79
+ cache_read_input_tokens: usage[:cache_read_input_tokens],
80
+ cache_write_input_tokens: usage[:cache_write_input_tokens],
81
+ hidden_output_tokens: usage[:hidden_output_tokens],
82
+ pricing_mode: usage[:pricing_mode],
37
83
  cost: cost_data,
38
84
  tags: LlmCostTracker.configuration.default_tags.merge(EventMetadata.tags(metadata)).freeze,
39
85
  latency_ms: normalized_latency_ms(latency_ms),
@@ -42,17 +88,8 @@ module LlmCostTracker
42
88
  provider_response_id: normalized_provider_response_id(provider_response_id),
43
89
  tracked_at: Time.now.utc
44
90
  )
45
-
46
- ActiveSupport::Notifications.instrument(EVENT_NAME, event.to_h)
47
-
48
- stored = store(event)
49
- Budget.check!(event) unless stored == false
50
-
51
- event
52
91
  end
53
92
 
54
- private
55
-
56
93
  def store(event)
57
94
  config = LlmCostTracker.configuration
58
95
  case config.storage_backend
@@ -69,7 +106,7 @@ module LlmCostTracker
69
106
 
70
107
  def log_event(event, config)
71
108
  message = "#{event.provider}/#{event.model} " \
72
- "tokens=#{event.input_tokens}+#{event.output_tokens} " \
109
+ "tokens=#{event.total_tokens} " \
73
110
  "cost=#{log_cost_label(event)}"
74
111
  message += " latency=#{event.latency_ms}ms" if event.latency_ms
75
112
  message += " stream=#{event.stream}" if event.stream
@@ -80,9 +117,7 @@ module LlmCostTracker
80
117
  event
81
118
  end
82
119
 
83
- def log_cost_label(event)
84
- event.cost ? "$#{format('%.6f', event.cost.total_cost)}" : "unknown"
85
- end
120
+ def log_cost_label(event) = event.cost ? "$#{format('%.6f', event.cost.total_cost)}" : "unknown"
86
121
 
87
122
  def active_record_save(event)
88
123
  require_relative "llm_api_call" unless defined?(LlmCostTracker::LlmApiCall)
@@ -111,11 +146,7 @@ module LlmCostTracker
111
146
  end
112
147
  end
113
148
 
114
- def normalized_latency_ms(latency_ms)
115
- return nil if latency_ms.nil?
116
-
117
- [latency_ms.to_i, 0].max
118
- end
149
+ def normalized_latency_ms(latency_ms) = latency_ms.nil? ? nil : [latency_ms.to_i, 0].max
119
150
 
120
151
  def normalized_usage_source(value)
121
152
  return nil if value.nil?
@@ -124,12 +155,7 @@ module LlmCostTracker
124
155
  USAGE_SOURCES.include?(symbol) ? symbol.to_s : nil
125
156
  end
126
157
 
127
- def normalized_provider_response_id(value)
128
- return nil if value.nil?
129
-
130
- string = value.to_s
131
- string.empty? ? nil : string
132
- end
158
+ def normalized_provider_response_id(value) = value.nil? || value.to_s.empty? ? nil : value.to_s
133
159
  end
134
160
  end
135
161
  end