llm_cost_tracker 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +35 -0
- data/README.md +34 -14
- data/app/services/llm_cost_tracker/dashboard/data_quality.rb +101 -19
- data/app/views/llm_cost_tracker/data_quality/index.html.erb +65 -0
- data/lib/llm_cost_tracker/budget.rb +85 -21
- data/lib/llm_cost_tracker/configuration.rb +4 -0
- data/lib/llm_cost_tracker/cost.rb +1 -2
- data/lib/llm_cost_tracker/errors.rb +22 -3
- data/lib/llm_cost_tracker/event.rb +4 -0
- data/lib/llm_cost_tracker/event_metadata.rb +21 -15
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_period_totals_generator.rb +29 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_usage_breakdown_generator.rb +29 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_period_totals_to_llm_cost_tracker.rb.erb +66 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_usage_breakdown_to_llm_api_calls.rb.erb +29 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_api_calls.rb.erb +15 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +3 -1
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/llm_cost_tracker_prices.yml.erb +11 -3
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_tags_to_jsonb.rb.erb +1 -0
- data/lib/llm_cost_tracker/middleware/faraday.rb +27 -9
- data/lib/llm_cost_tracker/parsed_usage.rb +16 -7
- data/lib/llm_cost_tracker/parsers/anthropic.rb +7 -6
- data/lib/llm_cost_tracker/parsers/base.rb +2 -1
- data/lib/llm_cost_tracker/parsers/gemini.rb +5 -2
- data/lib/llm_cost_tracker/parsers/openai_usage.rb +18 -5
- data/lib/llm_cost_tracker/period_total.rb +9 -0
- data/lib/llm_cost_tracker/price_registry.rb +14 -4
- data/lib/llm_cost_tracker/price_sync/merger.rb +1 -1
- data/lib/llm_cost_tracker/price_sync/raw_price.rb +3 -5
- data/lib/llm_cost_tracker/price_sync/sources/litellm.rb +2 -3
- data/lib/llm_cost_tracker/price_sync/sources/open_router.rb +2 -3
- data/lib/llm_cost_tracker/prices.json +30 -30
- data/lib/llm_cost_tracker/pricing.rb +44 -32
- data/lib/llm_cost_tracker/railtie.rb +2 -0
- data/lib/llm_cost_tracker/storage/active_record_rollups.rb +122 -0
- data/lib/llm_cost_tracker/storage/active_record_store.rb +38 -13
- data/lib/llm_cost_tracker/stream_collector.rb +5 -3
- data/lib/llm_cost_tracker/tags_column.rb +19 -0
- data/lib/llm_cost_tracker/tracker.rb +58 -32
- data/lib/llm_cost_tracker/unknown_pricing.rb +14 -0
- data/lib/llm_cost_tracker/usage_breakdown.rb +30 -0
- data/lib/llm_cost_tracker/version.rb +1 -1
- data/lib/llm_cost_tracker.rb +12 -3
- metadata +10 -4
- data/llm_cost_tracker.gemspec +0 -50
|
@@ -65,9 +65,8 @@ module LlmCostTracker
|
|
|
65
65
|
provider: provider,
|
|
66
66
|
input: price_per_million(entry["input_cost_per_token"]),
|
|
67
67
|
output: price_per_million(entry["output_cost_per_token"]),
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
cache_creation_input: provider == "anthropic" ? cache_write : nil,
|
|
68
|
+
cache_read_input: cache_read,
|
|
69
|
+
cache_write_input: cache_write,
|
|
71
70
|
source: name,
|
|
72
71
|
source_version: response_version(response),
|
|
73
72
|
fetched_at: response.fetched_at
|
|
@@ -68,9 +68,8 @@ module LlmCostTracker
|
|
|
68
68
|
provider: provider,
|
|
69
69
|
input: price_per_million(pricing["prompt"]),
|
|
70
70
|
output: price_per_million(pricing["completion"]),
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
cache_creation_input: provider == "anthropic" ? cache_write : nil,
|
|
71
|
+
cache_read_input: cache_read,
|
|
72
|
+
cache_write_input: cache_write,
|
|
74
73
|
source: name,
|
|
75
74
|
source_version: response_version(response),
|
|
76
75
|
fetched_at: response.fetched_at
|
|
@@ -10,40 +10,40 @@
|
|
|
10
10
|
]
|
|
11
11
|
},
|
|
12
12
|
"models": {
|
|
13
|
-
"gpt-5.2": { "input": 1.75, "
|
|
14
|
-
"gpt-5.1": { "input": 1.25, "
|
|
15
|
-
"gpt-5": { "input": 1.25, "
|
|
16
|
-
"gpt-5-mini": { "input": 0.25, "
|
|
17
|
-
"gpt-5-nano": { "input": 0.05, "
|
|
18
|
-
"gpt-4.1": { "input": 2.0, "
|
|
19
|
-
"gpt-4.1-mini": { "input": 0.4, "
|
|
20
|
-
"gpt-4.1-nano": { "input": 0.1, "
|
|
13
|
+
"gpt-5.2": { "input": 1.75, "cache_read_input": 0.175, "output": 14.0 },
|
|
14
|
+
"gpt-5.1": { "input": 1.25, "cache_read_input": 0.125, "output": 10.0 },
|
|
15
|
+
"gpt-5": { "input": 1.25, "cache_read_input": 0.125, "output": 10.0 },
|
|
16
|
+
"gpt-5-mini": { "input": 0.25, "cache_read_input": 0.025, "output": 2.0 },
|
|
17
|
+
"gpt-5-nano": { "input": 0.05, "cache_read_input": 0.005, "output": 0.4 },
|
|
18
|
+
"gpt-4.1": { "input": 2.0, "cache_read_input": 0.5, "output": 8.0 },
|
|
19
|
+
"gpt-4.1-mini": { "input": 0.4, "cache_read_input": 0.1, "output": 1.6 },
|
|
20
|
+
"gpt-4.1-nano": { "input": 0.1, "cache_read_input": 0.025, "output": 0.4 },
|
|
21
21
|
"gpt-4o-2024-05-13": { "input": 5.0, "output": 15.0 },
|
|
22
|
-
"gpt-4o": { "input": 2.5, "
|
|
23
|
-
"gpt-4o-mini": { "input": 0.15, "
|
|
22
|
+
"gpt-4o": { "input": 2.5, "cache_read_input": 1.25, "output": 10.0 },
|
|
23
|
+
"gpt-4o-mini": { "input": 0.15, "cache_read_input": 0.075, "output": 0.6 },
|
|
24
24
|
"gpt-4-turbo": { "input": 10.0, "output": 30.0 },
|
|
25
25
|
"gpt-4": { "input": 30.0, "output": 60.0 },
|
|
26
26
|
"gpt-3.5-turbo": { "input": 0.5, "output": 1.5 },
|
|
27
|
-
"o1": { "input": 15.0, "
|
|
28
|
-
"o1-mini": { "input": 1.1, "
|
|
29
|
-
"o3": { "input": 2.0, "
|
|
30
|
-
"o3-mini": { "input": 1.1, "
|
|
31
|
-
"o4-mini": { "input": 1.1, "
|
|
32
|
-
"claude-sonnet-4-6": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "
|
|
33
|
-
"claude-opus-4-6": { "input": 5.0, "output": 25.0, "cache_read_input": 0.5, "
|
|
34
|
-
"claude-opus-4-1": { "input": 15.0, "output": 75.0, "cache_read_input": 1.5, "
|
|
35
|
-
"claude-opus-4": { "input": 15.0, "output": 75.0, "cache_read_input": 1.5, "
|
|
36
|
-
"claude-sonnet-4-5": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "
|
|
37
|
-
"claude-sonnet-4": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "
|
|
38
|
-
"claude-haiku-4-5": { "input": 1.0, "output": 5.0, "cache_read_input": 0.1, "
|
|
39
|
-
"claude-3-7-sonnet": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "
|
|
40
|
-
"claude-3-5-sonnet": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "
|
|
41
|
-
"claude-3-5-haiku": { "input": 0.8, "output": 4.0, "cache_read_input": 0.08, "
|
|
42
|
-
"claude-3-opus": { "input": 15.0, "output": 75.0, "cache_read_input": 1.5, "
|
|
43
|
-
"gemini-2.5-pro": { "input": 1.25, "
|
|
44
|
-
"gemini-2.5-flash": { "input": 0.3, "
|
|
45
|
-
"gemini-2.5-flash-lite": { "input": 0.1, "
|
|
46
|
-
"gemini-2.0-flash": { "input": 0.1, "
|
|
27
|
+
"o1": { "input": 15.0, "cache_read_input": 7.5, "output": 60.0 },
|
|
28
|
+
"o1-mini": { "input": 1.1, "cache_read_input": 0.55, "output": 4.4 },
|
|
29
|
+
"o3": { "input": 2.0, "cache_read_input": 0.5, "output": 8.0 },
|
|
30
|
+
"o3-mini": { "input": 1.1, "cache_read_input": 0.55, "output": 4.4 },
|
|
31
|
+
"o4-mini": { "input": 1.1, "cache_read_input": 0.275, "output": 4.4 },
|
|
32
|
+
"claude-sonnet-4-6": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_write_input": 3.75 },
|
|
33
|
+
"claude-opus-4-6": { "input": 5.0, "output": 25.0, "cache_read_input": 0.5, "cache_write_input": 6.25 },
|
|
34
|
+
"claude-opus-4-1": { "input": 15.0, "output": 75.0, "cache_read_input": 1.5, "cache_write_input": 18.75 },
|
|
35
|
+
"claude-opus-4": { "input": 15.0, "output": 75.0, "cache_read_input": 1.5, "cache_write_input": 18.75 },
|
|
36
|
+
"claude-sonnet-4-5": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_write_input": 3.75 },
|
|
37
|
+
"claude-sonnet-4": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_write_input": 3.75 },
|
|
38
|
+
"claude-haiku-4-5": { "input": 1.0, "output": 5.0, "cache_read_input": 0.1, "cache_write_input": 1.25 },
|
|
39
|
+
"claude-3-7-sonnet": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_write_input": 3.75 },
|
|
40
|
+
"claude-3-5-sonnet": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_write_input": 3.75 },
|
|
41
|
+
"claude-3-5-haiku": { "input": 0.8, "output": 4.0, "cache_read_input": 0.08, "cache_write_input": 1.0 },
|
|
42
|
+
"claude-3-opus": { "input": 15.0, "output": 75.0, "cache_read_input": 1.5, "cache_write_input": 18.75 },
|
|
43
|
+
"gemini-2.5-pro": { "input": 1.25, "cache_read_input": 0.125, "output": 10.0 },
|
|
44
|
+
"gemini-2.5-flash": { "input": 0.3, "cache_read_input": 0.03, "output": 2.5 },
|
|
45
|
+
"gemini-2.5-flash-lite": { "input": 0.1, "cache_read_input": 0.01, "output": 0.4 },
|
|
46
|
+
"gemini-2.0-flash": { "input": 0.1, "cache_read_input": 0.025, "output": 0.4 },
|
|
47
47
|
"gemini-2.0-flash-lite": { "input": 0.075, "output": 0.3 },
|
|
48
48
|
"gemini-1.5-pro": { "input": 1.25, "output": 5.0 },
|
|
49
49
|
"gemini-1.5-flash": { "input": 0.075, "output": 0.3 }
|
|
@@ -8,32 +8,40 @@ module LlmCostTracker
|
|
|
8
8
|
MUTEX = Monitor.new
|
|
9
9
|
|
|
10
10
|
class << self
|
|
11
|
-
def cost_for(model:, input_tokens:, output_tokens:,
|
|
12
|
-
|
|
13
|
-
prices = lookup(model)
|
|
11
|
+
def cost_for(provider:, model:, input_tokens:, output_tokens:, cache_read_input_tokens: 0,
|
|
12
|
+
cache_write_input_tokens: 0, pricing_mode: nil)
|
|
13
|
+
prices = lookup(provider: provider, model: model)
|
|
14
14
|
return nil unless prices
|
|
15
15
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
16
|
+
usage = UsageBreakdown.build(
|
|
17
|
+
input_tokens: input_tokens,
|
|
18
|
+
output_tokens: output_tokens,
|
|
19
|
+
cache_read_input_tokens: cache_read_input_tokens,
|
|
20
|
+
cache_write_input_tokens: cache_write_input_tokens
|
|
21
|
+
)
|
|
22
|
+
costs = calculate_costs(usage, prices, pricing_mode: pricing_mode)
|
|
19
23
|
|
|
20
24
|
Cost.new(
|
|
21
25
|
input_cost: costs[:input].round(8),
|
|
22
|
-
cached_input_cost: costs[:cached_input].round(8),
|
|
23
26
|
cache_read_input_cost: costs[:cache_read_input].round(8),
|
|
24
|
-
|
|
27
|
+
cache_write_input_cost: costs[:cache_write_input].round(8),
|
|
25
28
|
output_cost: costs[:output].round(8),
|
|
26
29
|
total_cost: costs.values.sum.round(8),
|
|
27
30
|
currency: "USD"
|
|
28
31
|
)
|
|
29
32
|
end
|
|
30
33
|
|
|
31
|
-
def lookup(model)
|
|
34
|
+
def lookup(provider:, model:)
|
|
32
35
|
table = prices
|
|
36
|
+
provider_name = provider.to_s
|
|
33
37
|
model_name = model.to_s
|
|
38
|
+
provider_model = provider_name.empty? ? model_name : "#{provider_name}/#{model_name}"
|
|
34
39
|
normalized_model = normalize_model_name(model_name)
|
|
35
40
|
|
|
36
|
-
table[
|
|
41
|
+
table[provider_model] ||
|
|
42
|
+
table[model_name] ||
|
|
43
|
+
table[normalized_model] ||
|
|
44
|
+
fuzzy_match(provider_model, normalized_model, table)
|
|
37
45
|
end
|
|
38
46
|
|
|
39
47
|
def models
|
|
@@ -64,36 +72,40 @@ module LlmCostTracker
|
|
|
64
72
|
|
|
65
73
|
private
|
|
66
74
|
|
|
67
|
-
def
|
|
68
|
-
cache_read_input_tokens, cache_creation_input_tokens)
|
|
69
|
-
cached_input_tokens = cached_input_tokens.to_i
|
|
70
|
-
|
|
75
|
+
def calculate_costs(usage, prices, pricing_mode:)
|
|
71
76
|
{
|
|
72
|
-
input:
|
|
73
|
-
cached_input: cached_input_tokens,
|
|
74
|
-
cache_read_input: cache_read_input_tokens.to_i,
|
|
75
|
-
cache_creation_input: cache_creation_input_tokens.to_i,
|
|
76
|
-
output: output_tokens.to_i
|
|
77
|
-
}
|
|
78
|
-
end
|
|
79
|
-
|
|
80
|
-
def calculate_costs(token_counts, prices)
|
|
81
|
-
{
|
|
82
|
-
input: token_cost(token_counts[:input], prices[:input]),
|
|
83
|
-
cached_input: token_cost(token_counts[:cached_input], prices[:cached_input] || prices[:input]),
|
|
77
|
+
input: token_cost(usage.input_tokens, price_for(prices, :input, pricing_mode)),
|
|
84
78
|
cache_read_input: token_cost(
|
|
85
|
-
|
|
86
|
-
prices
|
|
79
|
+
usage.cache_read_input_tokens,
|
|
80
|
+
price_for(prices, :cache_read_input, pricing_mode) || price_for(prices, :input, pricing_mode)
|
|
87
81
|
),
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
prices
|
|
82
|
+
cache_write_input: token_cost(
|
|
83
|
+
usage.cache_write_input_tokens,
|
|
84
|
+
price_for(prices, :cache_write_input, pricing_mode) || price_for(prices, :input, pricing_mode)
|
|
91
85
|
),
|
|
92
|
-
output: token_cost(
|
|
86
|
+
output: token_cost(usage.output_tokens, price_for(prices, :output, pricing_mode))
|
|
93
87
|
}
|
|
94
88
|
end
|
|
95
89
|
|
|
90
|
+
def price_for(prices, key, pricing_mode)
|
|
91
|
+
mode = normalized_pricing_mode(pricing_mode)
|
|
92
|
+
return prices[key] unless mode
|
|
93
|
+
|
|
94
|
+
prices[:"#{mode}_#{key}"] || prices[key]
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def normalized_pricing_mode(value)
|
|
98
|
+
return nil if value.nil?
|
|
99
|
+
|
|
100
|
+
mode = value.to_s.strip
|
|
101
|
+
return nil if mode.empty? || mode == "standard"
|
|
102
|
+
|
|
103
|
+
mode
|
|
104
|
+
end
|
|
105
|
+
|
|
96
106
|
def token_cost(tokens, per_million_price)
|
|
107
|
+
return 0.0 if tokens.to_i.zero?
|
|
108
|
+
|
|
97
109
|
(tokens.to_f / 1_000_000) * per_million_price
|
|
98
110
|
end
|
|
99
111
|
|
|
@@ -3,9 +3,11 @@
|
|
|
3
3
|
module LlmCostTracker
|
|
4
4
|
class Railtie < Rails::Railtie
|
|
5
5
|
generators do
|
|
6
|
+
require_relative "generators/llm_cost_tracker/add_period_totals_generator"
|
|
6
7
|
require_relative "generators/llm_cost_tracker/add_latency_ms_generator"
|
|
7
8
|
require_relative "generators/llm_cost_tracker/add_provider_response_id_generator"
|
|
8
9
|
require_relative "generators/llm_cost_tracker/add_streaming_generator"
|
|
10
|
+
require_relative "generators/llm_cost_tracker/add_usage_breakdown_generator"
|
|
9
11
|
require_relative "generators/llm_cost_tracker/install_generator"
|
|
10
12
|
require_relative "generators/llm_cost_tracker/prices_generator"
|
|
11
13
|
require_relative "generators/llm_cost_tracker/upgrade_cost_precision_generator"
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LlmCostTracker
|
|
4
|
+
module Storage
|
|
5
|
+
class ActiveRecordRollups
|
|
6
|
+
PERIODS = {
|
|
7
|
+
monthly: "month",
|
|
8
|
+
daily: "day"
|
|
9
|
+
}.freeze
|
|
10
|
+
|
|
11
|
+
class << self
|
|
12
|
+
def reset!
|
|
13
|
+
remove_instance_variable(:@period_totals_enabled) if instance_variable_defined?(:@period_totals_enabled)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def increment!(event)
|
|
17
|
+
return unless event.cost&.total_cost
|
|
18
|
+
return unless period_totals_enabled?
|
|
19
|
+
|
|
20
|
+
PERIODS.each_key { |period| increment_period_total(period, event) }
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def monthly_total(time: Time.now.utc)
|
|
24
|
+
period_total(:monthly, time)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def daily_total(time: Time.now.utc)
|
|
28
|
+
period_total(:daily, time)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
private
|
|
32
|
+
|
|
33
|
+
def period_total(period, time)
|
|
34
|
+
if period_totals_enabled?
|
|
35
|
+
period_total_model
|
|
36
|
+
.where(period: PERIODS.fetch(period), period_start: bucket_for(period, time))
|
|
37
|
+
.pick(:total_cost)
|
|
38
|
+
.to_f
|
|
39
|
+
else
|
|
40
|
+
LlmCostTracker::LlmApiCall
|
|
41
|
+
.where(tracked_at: range_start_for(period, time)..time)
|
|
42
|
+
.sum(:total_cost)
|
|
43
|
+
.to_f
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def increment_period_total(period, event)
|
|
48
|
+
model = period_total_model
|
|
49
|
+
model.upsert_all(
|
|
50
|
+
[
|
|
51
|
+
{
|
|
52
|
+
period: PERIODS.fetch(period),
|
|
53
|
+
period_start: bucket_for(period, event.tracked_at),
|
|
54
|
+
total_cost: event.cost.total_cost
|
|
55
|
+
}
|
|
56
|
+
],
|
|
57
|
+
on_duplicate: total_upsert_sql(model),
|
|
58
|
+
record_timestamps: true,
|
|
59
|
+
unique_by: unique_by(model, %i[period period_start])
|
|
60
|
+
)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def period_totals_enabled?
|
|
64
|
+
return @period_totals_enabled unless @period_totals_enabled.nil?
|
|
65
|
+
|
|
66
|
+
@period_totals_enabled =
|
|
67
|
+
LlmCostTracker::LlmApiCall.connection.data_source_exists?("llm_cost_tracker_period_totals")
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def period_total_model
|
|
71
|
+
require_relative "../period_total" unless defined?(LlmCostTracker::PeriodTotal)
|
|
72
|
+
|
|
73
|
+
LlmCostTracker::PeriodTotal
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def range_start_for(period, time)
|
|
77
|
+
utc_time = time.to_time.utc
|
|
78
|
+
|
|
79
|
+
case period
|
|
80
|
+
when :monthly then utc_time.beginning_of_month
|
|
81
|
+
when :daily then utc_time.beginning_of_day
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def bucket_for(period, time)
|
|
86
|
+
utc_time = time.to_time.utc
|
|
87
|
+
|
|
88
|
+
case period
|
|
89
|
+
when :monthly then utc_time.beginning_of_month.to_date
|
|
90
|
+
when :daily then utc_time.to_date
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def unique_by(model, column)
|
|
95
|
+
return unless model.connection.supports_insert_conflict_target?
|
|
96
|
+
|
|
97
|
+
column
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def total_upsert_sql(model)
|
|
101
|
+
Arel.sql(case model.connection.adapter_name
|
|
102
|
+
when /mysql/i
|
|
103
|
+
mysql_upsert_sql(model)
|
|
104
|
+
else
|
|
105
|
+
"total_cost = total_cost + excluded.total_cost, updated_at = excluded.updated_at"
|
|
106
|
+
end)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def mysql_upsert_sql(model)
|
|
110
|
+
connection = model.connection
|
|
111
|
+
if connection.respond_to?(:supports_insert_raw_alias_syntax?, true) &&
|
|
112
|
+
connection.send(:supports_insert_raw_alias_syntax?)
|
|
113
|
+
values_reference = connection.quote_table_name("#{model.table_name}_values")
|
|
114
|
+
"total_cost = total_cost + #{values_reference}.total_cost, updated_at = #{values_reference}.updated_at"
|
|
115
|
+
else
|
|
116
|
+
"total_cost = total_cost + VALUES(total_cost), updated_at = VALUES(updated_at)"
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|
|
@@ -1,11 +1,19 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative "active_record_rollups"
|
|
4
|
+
|
|
3
5
|
module LlmCostTracker
|
|
4
6
|
module Storage
|
|
5
7
|
class ActiveRecordStore
|
|
6
8
|
class << self
|
|
9
|
+
def reset!
|
|
10
|
+
ActiveRecordRollups.reset!
|
|
11
|
+
end
|
|
12
|
+
|
|
7
13
|
def save(event)
|
|
8
14
|
tags = stringify_tags(event.tags || {})
|
|
15
|
+
model = LlmCostTracker::LlmApiCall
|
|
16
|
+
columns = model.columns_hash
|
|
9
17
|
|
|
10
18
|
attributes = {
|
|
11
19
|
provider: event.provider,
|
|
@@ -16,24 +24,30 @@ module LlmCostTracker
|
|
|
16
24
|
input_cost: event.cost&.input_cost,
|
|
17
25
|
output_cost: event.cost&.output_cost,
|
|
18
26
|
total_cost: event.cost&.total_cost,
|
|
19
|
-
tags: tags_for_storage(tags),
|
|
27
|
+
tags: tags_for_storage(tags, model),
|
|
20
28
|
tracked_at: event.tracked_at
|
|
21
29
|
}
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
attributes[:usage_source] = event.usage_source if LlmCostTracker::LlmApiCall.usage_source_column?
|
|
25
|
-
if LlmCostTracker::LlmApiCall.provider_response_id_column?
|
|
26
|
-
attributes[:provider_response_id] = event.provider_response_id
|
|
30
|
+
optional_attributes(event).each do |name, value|
|
|
31
|
+
attributes[name] = value if columns.key?(name.to_s)
|
|
27
32
|
end
|
|
33
|
+
attributes[:latency_ms] = event.latency_ms if columns.key?("latency_ms")
|
|
34
|
+
attributes[:stream] = event.stream if columns.key?("stream")
|
|
35
|
+
attributes[:usage_source] = event.usage_source if columns.key?("usage_source")
|
|
36
|
+
attributes[:provider_response_id] = event.provider_response_id if columns.key?("provider_response_id")
|
|
28
37
|
|
|
29
|
-
|
|
38
|
+
model.transaction do
|
|
39
|
+
call = model.create!(attributes)
|
|
40
|
+
ActiveRecordRollups.increment!(event)
|
|
41
|
+
call
|
|
42
|
+
end
|
|
30
43
|
end
|
|
31
44
|
|
|
32
45
|
def monthly_total(time: Time.now.utc)
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
46
|
+
ActiveRecordRollups.monthly_total(time: time)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def daily_total(time: Time.now.utc)
|
|
50
|
+
ActiveRecordRollups.daily_total(time: time)
|
|
37
51
|
end
|
|
38
52
|
|
|
39
53
|
private
|
|
@@ -42,8 +56,19 @@ module LlmCostTracker
|
|
|
42
56
|
tags.transform_keys(&:to_s).transform_values { |value| stringify_tag_value(value) }
|
|
43
57
|
end
|
|
44
58
|
|
|
45
|
-
def tags_for_storage(tags)
|
|
46
|
-
|
|
59
|
+
def tags_for_storage(tags, model)
|
|
60
|
+
model.tags_json_column? ? tags : tags.to_json
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def optional_attributes(event)
|
|
64
|
+
{
|
|
65
|
+
cache_read_input_tokens: event.cache_read_input_tokens,
|
|
66
|
+
cache_write_input_tokens: event.cache_write_input_tokens,
|
|
67
|
+
hidden_output_tokens: event.hidden_output_tokens,
|
|
68
|
+
cache_read_input_cost: event.cost&.cache_read_input_cost,
|
|
69
|
+
cache_write_input_cost: event.cost&.cache_write_input_cost,
|
|
70
|
+
pricing_mode: event.pricing_mode
|
|
71
|
+
}
|
|
47
72
|
end
|
|
48
73
|
|
|
49
74
|
def stringify_tag_value(value)
|
|
@@ -8,11 +8,12 @@ module LlmCostTracker
|
|
|
8
8
|
class StreamCollector
|
|
9
9
|
attr_reader :provider
|
|
10
10
|
|
|
11
|
-
def initialize(provider:, model:, latency_ms: nil, provider_response_id: nil, metadata: {})
|
|
11
|
+
def initialize(provider:, model:, latency_ms: nil, provider_response_id: nil, pricing_mode: nil, metadata: {})
|
|
12
12
|
@provider = provider.to_s
|
|
13
13
|
@model = model
|
|
14
14
|
@latency_ms = latency_ms
|
|
15
15
|
@provider_response_id = provider_response_id
|
|
16
|
+
@pricing_mode = pricing_mode
|
|
16
17
|
@metadata = ValueHelpers.deep_dup(metadata || {})
|
|
17
18
|
@events = []
|
|
18
19
|
@explicit_usage = nil
|
|
@@ -69,11 +70,12 @@ module LlmCostTracker
|
|
|
69
70
|
|
|
70
71
|
@finished = true
|
|
71
72
|
{
|
|
72
|
-
events:
|
|
73
|
+
events: @events.dup,
|
|
73
74
|
explicit_usage: ValueHelpers.deep_dup(@explicit_usage),
|
|
74
75
|
model: @model,
|
|
75
76
|
latency_ms: @latency_ms,
|
|
76
77
|
provider_response_id: @provider_response_id,
|
|
78
|
+
pricing_mode: @pricing_mode,
|
|
77
79
|
metadata: ValueHelpers.deep_dup(@metadata)
|
|
78
80
|
}
|
|
79
81
|
end
|
|
@@ -88,6 +90,7 @@ module LlmCostTracker
|
|
|
88
90
|
stream: true,
|
|
89
91
|
usage_source: parsed.usage_source,
|
|
90
92
|
provider_response_id: parsed.provider_response_id || snapshot[:provider_response_id],
|
|
93
|
+
pricing_mode: snapshot[:pricing_mode],
|
|
91
94
|
metadata: error_metadata(errored).merge(snapshot[:metadata]).merge(parsed.metadata)
|
|
92
95
|
)
|
|
93
96
|
end
|
|
@@ -136,7 +139,6 @@ module LlmCostTracker
|
|
|
136
139
|
model: snapshot[:model],
|
|
137
140
|
input_tokens: input,
|
|
138
141
|
output_tokens: output,
|
|
139
|
-
total_tokens: input + output,
|
|
140
142
|
stream: true,
|
|
141
143
|
usage_source: :manual,
|
|
142
144
|
**extras
|
|
@@ -36,5 +36,24 @@ module LlmCostTracker
|
|
|
36
36
|
def provider_response_id_column?
|
|
37
37
|
columns_hash.key?("provider_response_id")
|
|
38
38
|
end
|
|
39
|
+
|
|
40
|
+
def pricing_mode_column?
|
|
41
|
+
columns_hash.key?("pricing_mode")
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def usage_breakdown_columns?
|
|
45
|
+
%w[
|
|
46
|
+
cache_read_input_tokens
|
|
47
|
+
cache_write_input_tokens
|
|
48
|
+
hidden_output_tokens
|
|
49
|
+
].all? { |column| columns_hash.key?(column) }
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def usage_breakdown_cost_columns?
|
|
53
|
+
%w[
|
|
54
|
+
cache_read_input_cost
|
|
55
|
+
cache_write_input_cost
|
|
56
|
+
].all? { |column| columns_hash.key?(column) }
|
|
57
|
+
end
|
|
39
58
|
end
|
|
40
59
|
end
|
|
@@ -10,30 +10,76 @@ module LlmCostTracker
|
|
|
10
10
|
|
|
11
11
|
class << self
|
|
12
12
|
def enforce_budget!
|
|
13
|
+
return unless LlmCostTracker.configuration.enabled
|
|
14
|
+
|
|
13
15
|
Budget.enforce!
|
|
14
16
|
end
|
|
15
17
|
|
|
16
18
|
def record(provider:, model:, input_tokens:, output_tokens:, latency_ms: nil, stream: false,
|
|
17
|
-
usage_source: nil, provider_response_id: nil, metadata: {})
|
|
18
|
-
|
|
19
|
+
usage_source: nil, provider_response_id: nil, pricing_mode: nil, metadata: {})
|
|
20
|
+
return unless LlmCostTracker.configuration.enabled
|
|
21
|
+
|
|
22
|
+
usage = usage_data(input_tokens, output_tokens, metadata, pricing_mode)
|
|
23
|
+
cost_data = cost_for_usage(provider, model, usage)
|
|
24
|
+
|
|
25
|
+
UnknownPricing.handle!(model) unless cost_data
|
|
26
|
+
|
|
27
|
+
event = build_event(
|
|
28
|
+
provider: provider,
|
|
29
|
+
model: model,
|
|
30
|
+
usage: usage,
|
|
31
|
+
cost_data: cost_data,
|
|
32
|
+
metadata: metadata,
|
|
33
|
+
latency_ms: latency_ms,
|
|
34
|
+
stream: stream,
|
|
35
|
+
usage_source: usage_source,
|
|
36
|
+
provider_response_id: provider_response_id
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
ActiveSupport::Notifications.instrument(EVENT_NAME, event.to_h)
|
|
40
|
+
|
|
41
|
+
stored = store(event)
|
|
42
|
+
Budget.check!(event) unless stored == false
|
|
43
|
+
|
|
44
|
+
event
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
private
|
|
48
|
+
|
|
49
|
+
def usage_data(input_tokens, output_tokens, metadata, pricing_mode)
|
|
50
|
+
metadata = metadata.merge(pricing_mode: pricing_mode) unless pricing_mode.nil?
|
|
51
|
+
|
|
52
|
+
EventMetadata.usage_data(
|
|
53
|
+
input_tokens,
|
|
54
|
+
output_tokens,
|
|
55
|
+
metadata
|
|
56
|
+
)
|
|
57
|
+
end
|
|
19
58
|
|
|
20
|
-
|
|
59
|
+
def cost_for_usage(provider, model, usage)
|
|
60
|
+
Pricing.cost_for(
|
|
61
|
+
provider: provider,
|
|
21
62
|
model: model,
|
|
22
63
|
input_tokens: usage[:input_tokens],
|
|
23
64
|
output_tokens: usage[:output_tokens],
|
|
24
|
-
cached_input_tokens: usage[:cached_input_tokens],
|
|
25
65
|
cache_read_input_tokens: usage[:cache_read_input_tokens],
|
|
26
|
-
|
|
66
|
+
cache_write_input_tokens: usage[:cache_write_input_tokens],
|
|
67
|
+
pricing_mode: usage[:pricing_mode]
|
|
27
68
|
)
|
|
69
|
+
end
|
|
28
70
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
71
|
+
def build_event(provider:, model:, usage:, cost_data:, metadata:, latency_ms:, stream:, usage_source:,
|
|
72
|
+
provider_response_id:)
|
|
73
|
+
Event.new(
|
|
32
74
|
provider: provider,
|
|
33
75
|
model: model,
|
|
34
76
|
input_tokens: usage[:input_tokens],
|
|
35
77
|
output_tokens: usage[:output_tokens],
|
|
36
78
|
total_tokens: usage[:total_tokens],
|
|
79
|
+
cache_read_input_tokens: usage[:cache_read_input_tokens],
|
|
80
|
+
cache_write_input_tokens: usage[:cache_write_input_tokens],
|
|
81
|
+
hidden_output_tokens: usage[:hidden_output_tokens],
|
|
82
|
+
pricing_mode: usage[:pricing_mode],
|
|
37
83
|
cost: cost_data,
|
|
38
84
|
tags: LlmCostTracker.configuration.default_tags.merge(EventMetadata.tags(metadata)).freeze,
|
|
39
85
|
latency_ms: normalized_latency_ms(latency_ms),
|
|
@@ -42,17 +88,8 @@ module LlmCostTracker
|
|
|
42
88
|
provider_response_id: normalized_provider_response_id(provider_response_id),
|
|
43
89
|
tracked_at: Time.now.utc
|
|
44
90
|
)
|
|
45
|
-
|
|
46
|
-
ActiveSupport::Notifications.instrument(EVENT_NAME, event.to_h)
|
|
47
|
-
|
|
48
|
-
stored = store(event)
|
|
49
|
-
Budget.check!(event) unless stored == false
|
|
50
|
-
|
|
51
|
-
event
|
|
52
91
|
end
|
|
53
92
|
|
|
54
|
-
private
|
|
55
|
-
|
|
56
93
|
def store(event)
|
|
57
94
|
config = LlmCostTracker.configuration
|
|
58
95
|
case config.storage_backend
|
|
@@ -69,7 +106,7 @@ module LlmCostTracker
|
|
|
69
106
|
|
|
70
107
|
def log_event(event, config)
|
|
71
108
|
message = "#{event.provider}/#{event.model} " \
|
|
72
|
-
"tokens=#{event.
|
|
109
|
+
"tokens=#{event.total_tokens} " \
|
|
73
110
|
"cost=#{log_cost_label(event)}"
|
|
74
111
|
message += " latency=#{event.latency_ms}ms" if event.latency_ms
|
|
75
112
|
message += " stream=#{event.stream}" if event.stream
|
|
@@ -80,9 +117,7 @@ module LlmCostTracker
|
|
|
80
117
|
event
|
|
81
118
|
end
|
|
82
119
|
|
|
83
|
-
def log_cost_label(event)
|
|
84
|
-
event.cost ? "$#{format('%.6f', event.cost.total_cost)}" : "unknown"
|
|
85
|
-
end
|
|
120
|
+
def log_cost_label(event) = event.cost ? "$#{format('%.6f', event.cost.total_cost)}" : "unknown"
|
|
86
121
|
|
|
87
122
|
def active_record_save(event)
|
|
88
123
|
require_relative "llm_api_call" unless defined?(LlmCostTracker::LlmApiCall)
|
|
@@ -111,11 +146,7 @@ module LlmCostTracker
|
|
|
111
146
|
end
|
|
112
147
|
end
|
|
113
148
|
|
|
114
|
-
def normalized_latency_ms(latency_ms)
|
|
115
|
-
return nil if latency_ms.nil?
|
|
116
|
-
|
|
117
|
-
[latency_ms.to_i, 0].max
|
|
118
|
-
end
|
|
149
|
+
def normalized_latency_ms(latency_ms) = latency_ms.nil? ? nil : [latency_ms.to_i, 0].max
|
|
119
150
|
|
|
120
151
|
def normalized_usage_source(value)
|
|
121
152
|
return nil if value.nil?
|
|
@@ -124,12 +155,7 @@ module LlmCostTracker
|
|
|
124
155
|
USAGE_SOURCES.include?(symbol) ? symbol.to_s : nil
|
|
125
156
|
end
|
|
126
157
|
|
|
127
|
-
def normalized_provider_response_id(value)
|
|
128
|
-
return nil if value.nil?
|
|
129
|
-
|
|
130
|
-
string = value.to_s
|
|
131
|
-
string.empty? ? nil : string
|
|
132
|
-
end
|
|
158
|
+
def normalized_provider_response_id(value) = value.nil? || value.to_s.empty? ? nil : value.to_s
|
|
133
159
|
end
|
|
134
160
|
end
|
|
135
161
|
end
|