llm_cost_tracker 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +37 -0
- data/README.md +202 -11
- data/lib/llm_cost_tracker/budget.rb +97 -0
- data/lib/llm_cost_tracker/configuration.rb +37 -0
- data/lib/llm_cost_tracker/errors.rb +37 -0
- data/lib/llm_cost_tracker/event_metadata.rb +54 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_latency_ms_generator.rb +29 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_latency_ms_to_llm_api_calls.rb.erb +9 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_api_calls.rb.erb +16 -4
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +14 -1
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_cost_precision.rb.erb +15 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_tags_to_jsonb.rb.erb +41 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_cost_precision_generator.rb +29 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_tags_to_jsonb_generator.rb +29 -0
- data/lib/llm_cost_tracker/llm_api_call.rb +68 -2
- data/lib/llm_cost_tracker/middleware/faraday.rb +50 -12
- data/lib/llm_cost_tracker/parsers/anthropic.rb +4 -1
- data/lib/llm_cost_tracker/parsers/gemini.rb +9 -2
- data/lib/llm_cost_tracker/parsers/openai.rb +10 -3
- data/lib/llm_cost_tracker/parsers/openai_compatible.rb +44 -0
- data/lib/llm_cost_tracker/parsers/registry.rb +16 -7
- data/lib/llm_cost_tracker/price_registry.rb +69 -0
- data/lib/llm_cost_tracker/prices.json +51 -0
- data/lib/llm_cost_tracker/pricing.rb +74 -74
- data/lib/llm_cost_tracker/railtie.rb +3 -0
- data/lib/llm_cost_tracker/storage/active_record_store.rb +12 -3
- data/lib/llm_cost_tracker/tracker.rb +49 -54
- data/lib/llm_cost_tracker/unknown_pricing.rb +47 -0
- data/lib/llm_cost_tracker/version.rb +1 -1
- data/lib/llm_cost_tracker.rb +33 -5
- data/llm_cost_tracker.gemspec +4 -3
- metadata +20 -6
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
{
|
|
2
|
+
"metadata": {
|
|
3
|
+
"updated_at": "2026-04-18",
|
|
4
|
+
"currency": "USD",
|
|
5
|
+
"unit": "1M tokens",
|
|
6
|
+
"source_urls": [
|
|
7
|
+
"https://openai.com/api/pricing",
|
|
8
|
+
"https://www.anthropic.com/pricing",
|
|
9
|
+
"https://ai.google.dev/gemini-api/docs/pricing"
|
|
10
|
+
]
|
|
11
|
+
},
|
|
12
|
+
"models": {
|
|
13
|
+
"gpt-5.2": { "input": 1.75, "cached_input": 0.175, "output": 14.0 },
|
|
14
|
+
"gpt-5.1": { "input": 1.25, "cached_input": 0.125, "output": 10.0 },
|
|
15
|
+
"gpt-5": { "input": 1.25, "cached_input": 0.125, "output": 10.0 },
|
|
16
|
+
"gpt-5-mini": { "input": 0.25, "cached_input": 0.025, "output": 2.0 },
|
|
17
|
+
"gpt-5-nano": { "input": 0.05, "cached_input": 0.005, "output": 0.4 },
|
|
18
|
+
"gpt-4.1": { "input": 2.0, "cached_input": 0.5, "output": 8.0 },
|
|
19
|
+
"gpt-4.1-mini": { "input": 0.4, "cached_input": 0.1, "output": 1.6 },
|
|
20
|
+
"gpt-4.1-nano": { "input": 0.1, "cached_input": 0.025, "output": 0.4 },
|
|
21
|
+
"gpt-4o-2024-05-13": { "input": 5.0, "output": 15.0 },
|
|
22
|
+
"gpt-4o": { "input": 2.5, "cached_input": 1.25, "output": 10.0 },
|
|
23
|
+
"gpt-4o-mini": { "input": 0.15, "cached_input": 0.075, "output": 0.6 },
|
|
24
|
+
"gpt-4-turbo": { "input": 10.0, "output": 30.0 },
|
|
25
|
+
"gpt-4": { "input": 30.0, "output": 60.0 },
|
|
26
|
+
"gpt-3.5-turbo": { "input": 0.5, "output": 1.5 },
|
|
27
|
+
"o1": { "input": 15.0, "cached_input": 7.5, "output": 60.0 },
|
|
28
|
+
"o1-mini": { "input": 1.1, "cached_input": 0.55, "output": 4.4 },
|
|
29
|
+
"o3": { "input": 2.0, "cached_input": 0.5, "output": 8.0 },
|
|
30
|
+
"o3-mini": { "input": 1.1, "cached_input": 0.55, "output": 4.4 },
|
|
31
|
+
"o4-mini": { "input": 1.1, "cached_input": 0.275, "output": 4.4 },
|
|
32
|
+
"claude-sonnet-4-6": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_creation_input": 3.75 },
|
|
33
|
+
"claude-opus-4-6": { "input": 5.0, "output": 25.0, "cache_read_input": 0.5, "cache_creation_input": 6.25 },
|
|
34
|
+
"claude-opus-4-1": { "input": 15.0, "output": 75.0, "cache_read_input": 1.5, "cache_creation_input": 18.75 },
|
|
35
|
+
"claude-opus-4": { "input": 15.0, "output": 75.0, "cache_read_input": 1.5, "cache_creation_input": 18.75 },
|
|
36
|
+
"claude-sonnet-4-5": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_creation_input": 3.75 },
|
|
37
|
+
"claude-sonnet-4": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_creation_input": 3.75 },
|
|
38
|
+
"claude-haiku-4-5": { "input": 1.0, "output": 5.0, "cache_read_input": 0.1, "cache_creation_input": 1.25 },
|
|
39
|
+
"claude-3-7-sonnet": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_creation_input": 3.75 },
|
|
40
|
+
"claude-3-5-sonnet": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_creation_input": 3.75 },
|
|
41
|
+
"claude-3-5-haiku": { "input": 0.8, "output": 4.0, "cache_read_input": 0.08, "cache_creation_input": 1.0 },
|
|
42
|
+
"claude-3-opus": { "input": 15.0, "output": 75.0, "cache_read_input": 1.5, "cache_creation_input": 18.75 },
|
|
43
|
+
"gemini-2.5-pro": { "input": 1.25, "cached_input": 0.125, "output": 10.0 },
|
|
44
|
+
"gemini-2.5-flash": { "input": 0.3, "cached_input": 0.03, "output": 2.5 },
|
|
45
|
+
"gemini-2.5-flash-lite": { "input": 0.1, "cached_input": 0.01, "output": 0.4 },
|
|
46
|
+
"gemini-2.0-flash": { "input": 0.1, "cached_input": 0.025, "output": 0.4 },
|
|
47
|
+
"gemini-2.0-flash-lite": { "input": 0.075, "output": 0.3 },
|
|
48
|
+
"gemini-1.5-pro": { "input": 1.25, "output": 5.0 },
|
|
49
|
+
"gemini-1.5-flash": { "input": 0.075, "output": 0.3 }
|
|
50
|
+
}
|
|
51
|
+
}
|
|
@@ -2,52 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
module LlmCostTracker
|
|
4
4
|
# Prices per 1M tokens in USD.
|
|
5
|
-
# Updated: April 2026. Override via configuration.
|
|
6
5
|
module Pricing
|
|
7
|
-
PRICES =
|
|
8
|
-
# OpenAI
|
|
9
|
-
"gpt-5.2" => { input: 1.75, cached_input: 0.175, output: 14.00 },
|
|
10
|
-
"gpt-5.1" => { input: 1.25, cached_input: 0.125, output: 10.00 },
|
|
11
|
-
"gpt-5" => { input: 1.25, cached_input: 0.125, output: 10.00 },
|
|
12
|
-
"gpt-5-mini" => { input: 0.25, cached_input: 0.025, output: 2.00 },
|
|
13
|
-
"gpt-5-nano" => { input: 0.05, cached_input: 0.005, output: 0.40 },
|
|
14
|
-
"gpt-4.1" => { input: 2.00, cached_input: 0.50, output: 8.00 },
|
|
15
|
-
"gpt-4.1-mini" => { input: 0.40, cached_input: 0.10, output: 1.60 },
|
|
16
|
-
"gpt-4.1-nano" => { input: 0.10, cached_input: 0.025, output: 0.40 },
|
|
17
|
-
"gpt-4o-2024-05-13" => { input: 5.00, output: 15.00 },
|
|
18
|
-
"gpt-4o" => { input: 2.50, cached_input: 1.25, output: 10.00 },
|
|
19
|
-
"gpt-4o-mini" => { input: 0.15, cached_input: 0.075, output: 0.60 },
|
|
20
|
-
"gpt-4-turbo" => { input: 10.00, output: 30.00 },
|
|
21
|
-
"gpt-4" => { input: 30.00, output: 60.00 },
|
|
22
|
-
"gpt-3.5-turbo" => { input: 0.50, output: 1.50 },
|
|
23
|
-
"o1" => { input: 15.00, cached_input: 7.50, output: 60.00 },
|
|
24
|
-
"o1-mini" => { input: 1.10, cached_input: 0.55, output: 4.40 },
|
|
25
|
-
"o3" => { input: 2.00, cached_input: 0.50, output: 8.00 },
|
|
26
|
-
"o3-mini" => { input: 1.10, cached_input: 0.55, output: 4.40 },
|
|
27
|
-
"o4-mini" => { input: 1.10, cached_input: 0.275, output: 4.40 },
|
|
28
|
-
|
|
29
|
-
# Anthropic
|
|
30
|
-
"claude-sonnet-4-6" => { input: 3.00, output: 15.00, cache_read_input: 0.30, cache_creation_input: 3.75 },
|
|
31
|
-
"claude-opus-4-6" => { input: 5.00, output: 25.00, cache_read_input: 0.50, cache_creation_input: 6.25 },
|
|
32
|
-
"claude-opus-4-1" => { input: 15.00, output: 75.00, cache_read_input: 1.50, cache_creation_input: 18.75 },
|
|
33
|
-
"claude-opus-4" => { input: 15.00, output: 75.00, cache_read_input: 1.50, cache_creation_input: 18.75 },
|
|
34
|
-
"claude-sonnet-4-5" => { input: 3.00, output: 15.00, cache_read_input: 0.30, cache_creation_input: 3.75 },
|
|
35
|
-
"claude-sonnet-4" => { input: 3.00, output: 15.00, cache_read_input: 0.30, cache_creation_input: 3.75 },
|
|
36
|
-
"claude-haiku-4-5" => { input: 1.00, output: 5.00, cache_read_input: 0.10, cache_creation_input: 1.25 },
|
|
37
|
-
"claude-3-7-sonnet" => { input: 3.00, output: 15.00, cache_read_input: 0.30, cache_creation_input: 3.75 },
|
|
38
|
-
"claude-3-5-sonnet" => { input: 3.00, output: 15.00, cache_read_input: 0.30, cache_creation_input: 3.75 },
|
|
39
|
-
"claude-3-5-haiku" => { input: 0.80, output: 4.00, cache_read_input: 0.08, cache_creation_input: 1.00 },
|
|
40
|
-
"claude-3-opus" => { input: 15.00, output: 75.00, cache_read_input: 1.50, cache_creation_input: 18.75 },
|
|
41
|
-
|
|
42
|
-
# Google Gemini
|
|
43
|
-
"gemini-2.5-pro" => { input: 1.25, cached_input: 0.125, output: 10.00 },
|
|
44
|
-
"gemini-2.5-flash" => { input: 0.30, cached_input: 0.03, output: 2.50 },
|
|
45
|
-
"gemini-2.5-flash-lite" => { input: 0.10, cached_input: 0.01, output: 0.40 },
|
|
46
|
-
"gemini-2.0-flash" => { input: 0.10, cached_input: 0.025, output: 0.40 },
|
|
47
|
-
"gemini-2.0-flash-lite" => { input: 0.075, output: 0.30 },
|
|
48
|
-
"gemini-1.5-pro" => { input: 1.25, output: 5.00 },
|
|
49
|
-
"gemini-1.5-flash" => { input: 0.075, output: 0.30 }
|
|
50
|
-
}.freeze
|
|
6
|
+
PRICES = PriceRegistry.builtin_prices
|
|
51
7
|
|
|
52
8
|
class << self
|
|
53
9
|
def cost_for(model:, input_tokens:, output_tokens:, cached_input_tokens: 0,
|
|
@@ -55,54 +11,98 @@ module LlmCostTracker
|
|
|
55
11
|
prices = lookup(model)
|
|
56
12
|
return nil unless prices
|
|
57
13
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
uncached_input_tokens = [input_tokens.to_i - cached_input_tokens, 0].max
|
|
62
|
-
|
|
63
|
-
input_cost = (uncached_input_tokens.to_f / 1_000_000) * prices[:input]
|
|
64
|
-
cached_input_cost = (cached_input_tokens.to_f / 1_000_000) *
|
|
65
|
-
(prices[:cached_input] || prices[:input])
|
|
66
|
-
cache_read_input_cost = (cache_read_input_tokens.to_f / 1_000_000) *
|
|
67
|
-
(prices[:cache_read_input] || prices[:cached_input] || prices[:input])
|
|
68
|
-
cache_creation_input_cost = (cache_creation_input_tokens.to_f / 1_000_000) *
|
|
69
|
-
(prices[:cache_creation_input] || prices[:input])
|
|
70
|
-
output_cost = (output_tokens.to_f / 1_000_000) * prices[:output]
|
|
71
|
-
total_cost = input_cost + cached_input_cost + cache_read_input_cost +
|
|
72
|
-
cache_creation_input_cost + output_cost
|
|
14
|
+
token_counts = normalized_token_counts(input_tokens, output_tokens, cached_input_tokens,
|
|
15
|
+
cache_read_input_tokens, cache_creation_input_tokens)
|
|
16
|
+
costs = calculate_costs(token_counts, prices)
|
|
73
17
|
|
|
74
18
|
{
|
|
75
|
-
input_cost:
|
|
76
|
-
cached_input_cost:
|
|
77
|
-
cache_read_input_cost:
|
|
78
|
-
cache_creation_input_cost:
|
|
79
|
-
output_cost:
|
|
80
|
-
total_cost:
|
|
19
|
+
input_cost: costs[:input].round(8),
|
|
20
|
+
cached_input_cost: costs[:cached_input].round(8),
|
|
21
|
+
cache_read_input_cost: costs[:cache_read_input].round(8),
|
|
22
|
+
cache_creation_input_cost: costs[:cache_creation_input].round(8),
|
|
23
|
+
output_cost: costs[:output].round(8),
|
|
24
|
+
total_cost: costs.values.sum.round(8),
|
|
81
25
|
currency: "USD"
|
|
82
26
|
}
|
|
83
27
|
end
|
|
84
28
|
|
|
85
29
|
def lookup(model)
|
|
86
|
-
|
|
87
|
-
|
|
30
|
+
table = prices
|
|
31
|
+
model_name = model.to_s
|
|
32
|
+
normalized_model = normalize_model_name(model_name)
|
|
33
|
+
|
|
34
|
+
table[model_name] || table[normalized_model] || fuzzy_match(model_name, normalized_model, table)
|
|
88
35
|
end
|
|
89
36
|
|
|
90
37
|
def models
|
|
91
|
-
|
|
38
|
+
prices.keys
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def metadata
|
|
42
|
+
PriceRegistry.metadata
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def prices
|
|
46
|
+
PRICES
|
|
47
|
+
.merge(PriceRegistry.file_prices(LlmCostTracker.configuration.prices_file))
|
|
48
|
+
.merge(PriceRegistry.normalize_price_table(LlmCostTracker.configuration.pricing_overrides))
|
|
92
49
|
end
|
|
93
50
|
|
|
94
51
|
private
|
|
95
52
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
53
|
+
def normalized_token_counts(input_tokens, output_tokens, cached_input_tokens,
|
|
54
|
+
cache_read_input_tokens, cache_creation_input_tokens)
|
|
55
|
+
cached_input_tokens = cached_input_tokens.to_i
|
|
56
|
+
|
|
57
|
+
{
|
|
58
|
+
input: [input_tokens.to_i - cached_input_tokens, 0].max,
|
|
59
|
+
cached_input: cached_input_tokens,
|
|
60
|
+
cache_read_input: cache_read_input_tokens.to_i,
|
|
61
|
+
cache_creation_input: cache_creation_input_tokens.to_i,
|
|
62
|
+
output: output_tokens.to_i
|
|
63
|
+
}
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def calculate_costs(token_counts, prices)
|
|
67
|
+
{
|
|
68
|
+
input: token_cost(token_counts[:input], prices[:input]),
|
|
69
|
+
cached_input: token_cost(token_counts[:cached_input], prices[:cached_input] || prices[:input]),
|
|
70
|
+
cache_read_input: token_cost(
|
|
71
|
+
token_counts[:cache_read_input],
|
|
72
|
+
prices[:cache_read_input] || prices[:cached_input] || prices[:input]
|
|
73
|
+
),
|
|
74
|
+
cache_creation_input: token_cost(
|
|
75
|
+
token_counts[:cache_creation_input],
|
|
76
|
+
prices[:cache_creation_input] || prices[:input]
|
|
77
|
+
),
|
|
78
|
+
output: token_cost(token_counts[:output], prices[:output])
|
|
79
|
+
}
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def token_cost(tokens, per_million_price)
|
|
83
|
+
(tokens.to_f / 1_000_000) * per_million_price
|
|
84
|
+
end
|
|
99
85
|
|
|
100
|
-
|
|
101
|
-
|
|
86
|
+
def normalize_model_name(model)
|
|
87
|
+
model.to_s.split("/").last
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Try to match model names like "gpt-4o-2024-08-06" to "gpt-4o".
|
|
91
|
+
def fuzzy_match(model, normalized_model, table)
|
|
92
|
+
sorted_price_keys(table).each do |key|
|
|
93
|
+
return table[key] if model.start_with?(key) || normalized_model.start_with?(key)
|
|
102
94
|
end
|
|
103
95
|
|
|
104
96
|
nil
|
|
105
97
|
end
|
|
98
|
+
|
|
99
|
+
def sorted_price_keys(table)
|
|
100
|
+
cache_key = table.keys
|
|
101
|
+
return @sorted_price_keys if @sorted_price_keys_cache_key == cache_key
|
|
102
|
+
|
|
103
|
+
@sorted_price_keys_cache_key = cache_key
|
|
104
|
+
@sorted_price_keys = cache_key.sort_by { |key| -key.length }
|
|
105
|
+
end
|
|
106
106
|
end
|
|
107
107
|
end
|
|
108
108
|
end
|
|
@@ -3,7 +3,10 @@
|
|
|
3
3
|
module LlmCostTracker
|
|
4
4
|
class Railtie < Rails::Railtie
|
|
5
5
|
generators do
|
|
6
|
+
require_relative "generators/llm_cost_tracker/add_latency_ms_generator"
|
|
6
7
|
require_relative "generators/llm_cost_tracker/install_generator"
|
|
8
|
+
require_relative "generators/llm_cost_tracker/upgrade_cost_precision_generator"
|
|
9
|
+
require_relative "generators/llm_cost_tracker/upgrade_tags_to_jsonb_generator"
|
|
7
10
|
end
|
|
8
11
|
|
|
9
12
|
initializer "llm_cost_tracker.configure" do
|
|
@@ -5,7 +5,9 @@ module LlmCostTracker
|
|
|
5
5
|
class ActiveRecordStore
|
|
6
6
|
class << self
|
|
7
7
|
def save(event)
|
|
8
|
-
|
|
8
|
+
tags = stringify_tags(event[:tags] || {})
|
|
9
|
+
|
|
10
|
+
attributes = {
|
|
9
11
|
provider: event[:provider],
|
|
10
12
|
model: event[:model],
|
|
11
13
|
input_tokens: event[:input_tokens],
|
|
@@ -14,9 +16,12 @@ module LlmCostTracker
|
|
|
14
16
|
input_cost: event.dig(:cost, :input_cost),
|
|
15
17
|
output_cost: event.dig(:cost, :output_cost),
|
|
16
18
|
total_cost: event.dig(:cost, :total_cost),
|
|
17
|
-
tags:
|
|
19
|
+
tags: tags_for_storage(tags),
|
|
18
20
|
tracked_at: event[:tracked_at]
|
|
19
|
-
|
|
21
|
+
}
|
|
22
|
+
attributes[:latency_ms] = event[:latency_ms] if model_class.latency_column?
|
|
23
|
+
|
|
24
|
+
model_class.create!(attributes)
|
|
20
25
|
end
|
|
21
26
|
|
|
22
27
|
def monthly_total(time: Time.now.utc)
|
|
@@ -38,6 +43,10 @@ module LlmCostTracker
|
|
|
38
43
|
tags.transform_keys(&:to_s).transform_values { |value| stringify_tag_value(value) }
|
|
39
44
|
end
|
|
40
45
|
|
|
46
|
+
def tags_for_storage(tags)
|
|
47
|
+
model_class.tags_json_column? ? tags : tags.to_json
|
|
48
|
+
end
|
|
49
|
+
|
|
41
50
|
def stringify_tag_value(value)
|
|
42
51
|
return value.transform_values { |nested| stringify_tag_value(nested) } if value.is_a?(Hash)
|
|
43
52
|
|
|
@@ -5,8 +5,12 @@ module LlmCostTracker
|
|
|
5
5
|
EVENT_NAME = "llm_request.llm_cost_tracker"
|
|
6
6
|
|
|
7
7
|
class << self
|
|
8
|
-
def
|
|
9
|
-
|
|
8
|
+
def enforce_budget!
|
|
9
|
+
Budget.enforce!
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def record(provider:, model:, input_tokens:, output_tokens:, metadata: {}, latency_ms: nil)
|
|
13
|
+
usage = EventMetadata.usage_data(input_tokens, output_tokens, metadata)
|
|
10
14
|
|
|
11
15
|
cost_data = Pricing.cost_for(
|
|
12
16
|
model: model,
|
|
@@ -17,6 +21,8 @@ module LlmCostTracker
|
|
|
17
21
|
cache_creation_input_tokens: usage[:cache_creation_input_tokens]
|
|
18
22
|
)
|
|
19
23
|
|
|
24
|
+
UnknownPricing.handle!(model) unless cost_data
|
|
25
|
+
|
|
20
26
|
event = {
|
|
21
27
|
provider: provider,
|
|
22
28
|
model: model,
|
|
@@ -24,7 +30,8 @@ module LlmCostTracker
|
|
|
24
30
|
output_tokens: usage[:output_tokens],
|
|
25
31
|
total_tokens: usage[:total_tokens],
|
|
26
32
|
cost: cost_data,
|
|
27
|
-
tags: LlmCostTracker.configuration.default_tags.merge(metadata),
|
|
33
|
+
tags: LlmCostTracker.configuration.default_tags.merge(EventMetadata.tags(metadata)),
|
|
34
|
+
latency_ms: normalized_latency_ms(latency_ms),
|
|
28
35
|
tracked_at: Time.now.utc
|
|
29
36
|
}
|
|
30
37
|
|
|
@@ -32,10 +39,8 @@ module LlmCostTracker
|
|
|
32
39
|
ActiveSupport::Notifications.instrument(EVENT_NAME, event)
|
|
33
40
|
|
|
34
41
|
# Store based on backend
|
|
35
|
-
store(event)
|
|
36
|
-
|
|
37
|
-
# Budget check
|
|
38
|
-
check_budget(event)
|
|
42
|
+
stored = store(event)
|
|
43
|
+
Budget.check!(event) unless stored == false
|
|
39
44
|
|
|
40
45
|
event
|
|
41
46
|
end
|
|
@@ -53,6 +58,13 @@ module LlmCostTracker
|
|
|
53
58
|
when :custom
|
|
54
59
|
config.custom_storage&.call(event)
|
|
55
60
|
end
|
|
61
|
+
|
|
62
|
+
true
|
|
63
|
+
rescue BudgetExceededError, UnknownPricingError
|
|
64
|
+
raise
|
|
65
|
+
rescue StandardError => e
|
|
66
|
+
handle_storage_error(e)
|
|
67
|
+
false
|
|
56
68
|
end
|
|
57
69
|
|
|
58
70
|
def log_event(event)
|
|
@@ -61,6 +73,7 @@ module LlmCostTracker
|
|
|
61
73
|
message = "[LlmCostTracker] #{event[:provider]}/#{event[:model]} " \
|
|
62
74
|
"tokens=#{event[:input_tokens]}+#{event[:output_tokens]} " \
|
|
63
75
|
"cost=#{cost_str}"
|
|
76
|
+
message += " latency=#{event[:latency_ms]}ms" if event[:latency_ms]
|
|
64
77
|
message += " tags=#{event[:tags]}" unless event[:tags].empty?
|
|
65
78
|
|
|
66
79
|
case LlmCostTracker.configuration.log_level
|
|
@@ -76,6 +89,16 @@ module LlmCostTracker
|
|
|
76
89
|
warn(message) unless defined?(Rails)
|
|
77
90
|
end
|
|
78
91
|
|
|
92
|
+
def log_warning(message)
|
|
93
|
+
message = "[LlmCostTracker] #{message}"
|
|
94
|
+
|
|
95
|
+
if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
|
|
96
|
+
Rails.logger.warn(message)
|
|
97
|
+
else
|
|
98
|
+
warn message
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
79
102
|
def store_active_record(event)
|
|
80
103
|
require_relative "llm_api_call" unless defined?(LlmCostTracker::LlmApiCall)
|
|
81
104
|
require_relative "storage/active_record_store" unless defined?(LlmCostTracker::Storage::ActiveRecordStore)
|
|
@@ -85,59 +108,31 @@ module LlmCostTracker
|
|
|
85
108
|
raise Error, "ActiveRecord storage requires the active_record gem: #{e.message}"
|
|
86
109
|
end
|
|
87
110
|
|
|
88
|
-
def
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
monthly_total: monthly_total,
|
|
98
|
-
budget: config.monthly_budget,
|
|
99
|
-
last_event: event
|
|
100
|
-
)
|
|
101
|
-
end
|
|
102
|
-
|
|
103
|
-
def calculate_monthly_total(latest_cost)
|
|
104
|
-
# For :active_record backend, query the DB
|
|
105
|
-
if LlmCostTracker.configuration.active_record? &&
|
|
106
|
-
defined?(LlmCostTracker::Storage::ActiveRecordStore)
|
|
107
|
-
LlmCostTracker::Storage::ActiveRecordStore.monthly_total
|
|
108
|
-
else
|
|
109
|
-
# For other backends, we can only report the latest cost
|
|
110
|
-
latest_cost
|
|
111
|
+
def handle_storage_error(error)
|
|
112
|
+
case storage_error_behavior
|
|
113
|
+
when :ignore
|
|
114
|
+
nil
|
|
115
|
+
when :warn
|
|
116
|
+
log_warning("Storage failed; tracking event was not persisted: #{error.class}: #{error.message}")
|
|
117
|
+
when :raise
|
|
118
|
+
storage_error = StorageError.new(error)
|
|
119
|
+
raise storage_error
|
|
111
120
|
end
|
|
112
121
|
end
|
|
113
122
|
|
|
114
|
-
def
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
cached_input_tokens = integer_metadata(metadata, :cached_input_tokens)
|
|
122
|
-
|
|
123
|
-
{
|
|
124
|
-
input_tokens: input_tokens.to_i,
|
|
125
|
-
output_tokens: output_tokens.to_i,
|
|
126
|
-
cached_input_tokens: cached_input_tokens,
|
|
127
|
-
cache_read_input_tokens: cache_read_input_tokens,
|
|
128
|
-
cache_creation_input_tokens: cache_creation_input_tokens,
|
|
129
|
-
total_tokens: input_tokens.to_i + output_tokens.to_i +
|
|
130
|
-
cache_read_input_tokens + cache_creation_input_tokens
|
|
131
|
-
}
|
|
123
|
+
def storage_error_behavior
|
|
124
|
+
behavior = (LlmCostTracker.configuration.storage_error_behavior || :warn).to_sym
|
|
125
|
+
return behavior if Configuration::STORAGE_ERROR_BEHAVIORS.include?(behavior)
|
|
126
|
+
|
|
127
|
+
raise Error,
|
|
128
|
+
"Unknown storage_error_behavior: #{behavior.inspect}. " \
|
|
129
|
+
"Use one of: #{Configuration::STORAGE_ERROR_BEHAVIORS.join(', ')}"
|
|
132
130
|
end
|
|
133
131
|
|
|
134
|
-
def
|
|
135
|
-
|
|
136
|
-
value = metadata[key] || metadata[key.to_s]
|
|
137
|
-
return value.to_i unless value.nil?
|
|
138
|
-
end
|
|
132
|
+
def normalized_latency_ms(latency_ms)
|
|
133
|
+
return nil if latency_ms.nil?
|
|
139
134
|
|
|
140
|
-
0
|
|
135
|
+
[latency_ms.to_i, 0].max
|
|
141
136
|
end
|
|
142
137
|
end
|
|
143
138
|
end
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LlmCostTracker
|
|
4
|
+
class UnknownPricing
|
|
5
|
+
class << self
|
|
6
|
+
def handle!(model)
|
|
7
|
+
model = normalized_model_name(model)
|
|
8
|
+
|
|
9
|
+
case behavior
|
|
10
|
+
when :ignore
|
|
11
|
+
nil
|
|
12
|
+
when :warn
|
|
13
|
+
warn_missing(model)
|
|
14
|
+
when :raise
|
|
15
|
+
raise UnknownPricingError.new(model: model)
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
private
|
|
20
|
+
|
|
21
|
+
def normalized_model_name(model)
|
|
22
|
+
model.to_s.empty? ? "unknown" : model.to_s
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def warn_missing(model)
|
|
26
|
+
message = "[LlmCostTracker] No pricing configured for model #{model.inspect}. " \
|
|
27
|
+
"Cost and budget enforcement will be skipped for this event. " \
|
|
28
|
+
"Add a pricing_overrides entry or set unknown_pricing_behavior."
|
|
29
|
+
|
|
30
|
+
if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
|
|
31
|
+
Rails.logger.warn(message)
|
|
32
|
+
else
|
|
33
|
+
Kernel.warn(message)
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def behavior
|
|
38
|
+
behavior = (LlmCostTracker.configuration.unknown_pricing_behavior || :warn).to_sym
|
|
39
|
+
return behavior if Configuration::UNKNOWN_PRICING_BEHAVIORS.include?(behavior)
|
|
40
|
+
|
|
41
|
+
raise Error,
|
|
42
|
+
"Unknown unknown_pricing_behavior: #{behavior.inspect}. " \
|
|
43
|
+
"Use one of: #{Configuration::UNKNOWN_PRICING_BEHAVIORS.join(', ')}"
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
data/lib/llm_cost_tracker.rb
CHANGED
|
@@ -5,31 +5,39 @@ require "active_support/notifications"
|
|
|
5
5
|
|
|
6
6
|
require_relative "llm_cost_tracker/version"
|
|
7
7
|
require_relative "llm_cost_tracker/configuration"
|
|
8
|
+
require_relative "llm_cost_tracker/errors"
|
|
9
|
+
require_relative "llm_cost_tracker/price_registry"
|
|
8
10
|
require_relative "llm_cost_tracker/pricing"
|
|
9
11
|
require_relative "llm_cost_tracker/parsers/base"
|
|
10
12
|
require_relative "llm_cost_tracker/parsers/openai"
|
|
13
|
+
require_relative "llm_cost_tracker/parsers/openai_compatible"
|
|
11
14
|
require_relative "llm_cost_tracker/parsers/anthropic"
|
|
12
15
|
require_relative "llm_cost_tracker/parsers/gemini"
|
|
13
16
|
require_relative "llm_cost_tracker/parsers/registry"
|
|
14
17
|
require_relative "llm_cost_tracker/middleware/faraday"
|
|
18
|
+
require_relative "llm_cost_tracker/budget"
|
|
19
|
+
require_relative "llm_cost_tracker/unknown_pricing"
|
|
20
|
+
require_relative "llm_cost_tracker/event_metadata"
|
|
15
21
|
require_relative "llm_cost_tracker/tracker"
|
|
16
22
|
|
|
17
23
|
module LlmCostTracker
|
|
18
|
-
class Error < StandardError; end
|
|
19
|
-
|
|
20
24
|
class << self
|
|
25
|
+
CONFIGURATION_MUTEX = Mutex.new
|
|
26
|
+
|
|
21
27
|
attr_writer :configuration
|
|
22
28
|
|
|
23
29
|
def configuration
|
|
24
|
-
@configuration ||= Configuration.new
|
|
30
|
+
@configuration || CONFIGURATION_MUTEX.synchronize { @configuration ||= Configuration.new }
|
|
25
31
|
end
|
|
26
32
|
|
|
27
33
|
def configure
|
|
28
34
|
yield(configuration)
|
|
35
|
+
configuration.normalize_openai_compatible_providers!
|
|
36
|
+
warn_for_configuration!
|
|
29
37
|
end
|
|
30
38
|
|
|
31
39
|
def reset_configuration!
|
|
32
|
-
@configuration = Configuration.new
|
|
40
|
+
CONFIGURATION_MUTEX.synchronize { @configuration = Configuration.new }
|
|
33
41
|
end
|
|
34
42
|
|
|
35
43
|
# Manual tracking for non-Faraday clients
|
|
@@ -42,15 +50,35 @@ module LlmCostTracker
|
|
|
42
50
|
# feature: "chat",
|
|
43
51
|
# user_id: current_user.id
|
|
44
52
|
# )
|
|
45
|
-
def track(provider:, model:, input_tokens:, output_tokens:, **metadata)
|
|
53
|
+
def track(provider:, model:, input_tokens:, output_tokens:, latency_ms: nil, **metadata)
|
|
46
54
|
Tracker.record(
|
|
47
55
|
provider: provider.to_s,
|
|
48
56
|
model: model,
|
|
49
57
|
input_tokens: input_tokens,
|
|
50
58
|
output_tokens: output_tokens,
|
|
59
|
+
latency_ms: latency_ms,
|
|
51
60
|
metadata: metadata
|
|
52
61
|
)
|
|
53
62
|
end
|
|
63
|
+
|
|
64
|
+
private
|
|
65
|
+
|
|
66
|
+
def warn_for_configuration!
|
|
67
|
+
return unless (configuration.budget_exceeded_behavior || :notify).to_sym == :block_requests
|
|
68
|
+
return if configuration.active_record?
|
|
69
|
+
|
|
70
|
+
log_warning(":block_requests requires storage_backend = :active_record; preflight blocking will be skipped.")
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def log_warning(message)
|
|
74
|
+
message = "[LlmCostTracker] #{message}"
|
|
75
|
+
|
|
76
|
+
if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
|
|
77
|
+
Rails.logger.warn(message)
|
|
78
|
+
else
|
|
79
|
+
warn message
|
|
80
|
+
end
|
|
81
|
+
end
|
|
54
82
|
end
|
|
55
83
|
end
|
|
56
84
|
|
data/llm_cost_tracker.gemspec
CHANGED
|
@@ -8,10 +8,11 @@ Gem::Specification.new do |spec|
|
|
|
8
8
|
spec.authors = ["Sergii Khomenko"]
|
|
9
9
|
spec.email = ["sergey@mm.st"]
|
|
10
10
|
|
|
11
|
-
spec.summary = "Self-hosted LLM API cost
|
|
12
|
-
spec.description = "Tracks token usage and estimated costs for OpenAI, Anthropic,
|
|
11
|
+
spec.summary = "Self-hosted LLM API cost guardrails for Ruby and Rails"
|
|
12
|
+
spec.description = "Tracks token usage and estimated costs for OpenAI, Anthropic, Google Gemini, " \
|
|
13
|
+
"OpenRouter, DeepSeek, and OpenAI-compatible calls. " \
|
|
13
14
|
"Works as Faraday middleware for Ruby clients, with ActiveRecord storage, " \
|
|
14
|
-
"per-user/per-feature attribution, and budget
|
|
15
|
+
"per-user/per-feature attribution, budget alerts, and budget enforcement."
|
|
15
16
|
spec.homepage = "https://github.com/sergey-homenko/llm_cost_tracker"
|
|
16
17
|
spec.license = "MIT"
|
|
17
18
|
|