llm_cost_tracker 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +69 -0
- data/README.md +333 -30
- data/lib/llm_cost_tracker/budget.rb +85 -0
- data/lib/llm_cost_tracker/configuration.rb +82 -3
- data/lib/llm_cost_tracker/cost.rb +15 -0
- data/lib/llm_cost_tracker/errors.rb +37 -0
- data/lib/llm_cost_tracker/event.rb +24 -0
- data/lib/llm_cost_tracker/event_metadata.rb +54 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_latency_ms_generator.rb +29 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/prices_generator.rb +20 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_latency_ms_to_llm_api_calls.rb.erb +9 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_api_calls.rb.erb +16 -4
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +14 -1
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/llm_cost_tracker_prices.yml.erb +36 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_cost_precision.rb.erb +15 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_tags_to_jsonb.rb.erb +41 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_cost_precision_generator.rb +29 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_tags_to_jsonb_generator.rb +29 -0
- data/lib/llm_cost_tracker/llm_api_call.rb +45 -14
- data/lib/llm_cost_tracker/logging.rb +44 -0
- data/lib/llm_cost_tracker/middleware/faraday.rb +54 -13
- data/lib/llm_cost_tracker/parsed_usage.rb +45 -0
- data/lib/llm_cost_tracker/parsers/anthropic.rb +6 -4
- data/lib/llm_cost_tracker/parsers/base.rb +2 -0
- data/lib/llm_cost_tracker/parsers/gemini.rb +12 -5
- data/lib/llm_cost_tracker/parsers/openai.rb +11 -22
- data/lib/llm_cost_tracker/parsers/openai_compatible.rb +48 -0
- data/lib/llm_cost_tracker/parsers/openai_usage.rb +33 -0
- data/lib/llm_cost_tracker/parsers/registry.rb +16 -7
- data/lib/llm_cost_tracker/price_registry.rb +99 -0
- data/lib/llm_cost_tracker/prices.json +51 -0
- data/lib/llm_cost_tracker/pricing.rb +103 -77
- data/lib/llm_cost_tracker/railtie.rb +8 -0
- data/lib/llm_cost_tracker/report.rb +29 -0
- data/lib/llm_cost_tracker/report_data.rb +84 -0
- data/lib/llm_cost_tracker/report_formatter.rb +59 -0
- data/lib/llm_cost_tracker/storage/active_record_backend.rb +19 -0
- data/lib/llm_cost_tracker/storage/active_record_store.rb +21 -12
- data/lib/llm_cost_tracker/storage/backends.rb +26 -0
- data/lib/llm_cost_tracker/storage/custom_backend.rb +16 -0
- data/lib/llm_cost_tracker/storage/log_backend.rb +28 -0
- data/lib/llm_cost_tracker/tag_accessors.rb +23 -0
- data/lib/llm_cost_tracker/tag_query.rb +38 -0
- data/lib/llm_cost_tracker/tags_column.rb +16 -0
- data/lib/llm_cost_tracker/tracker.rb +43 -97
- data/lib/llm_cost_tracker/unknown_pricing.rb +40 -0
- data/lib/llm_cost_tracker/value_object.rb +45 -0
- data/lib/llm_cost_tracker/version.rb +1 -1
- data/lib/llm_cost_tracker.rb +49 -6
- data/lib/tasks/llm_cost_tracker.rake +9 -0
- data/llm_cost_tracker.gemspec +4 -3
- metadata +39 -6
|
@@ -1,108 +1,134 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module LlmCostTracker
|
|
4
|
-
#
|
|
5
|
-
# Updated: April 2026. Override via configuration.
|
|
4
|
+
# Calculates costs from price entries expressed in USD per 1M tokens.
|
|
6
5
|
module Pricing
|
|
7
|
-
PRICES =
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
"gpt-5-nano" => { input: 0.05, cached_input: 0.005, output: 0.40 },
|
|
14
|
-
"gpt-4.1" => { input: 2.00, cached_input: 0.50, output: 8.00 },
|
|
15
|
-
"gpt-4.1-mini" => { input: 0.40, cached_input: 0.10, output: 1.60 },
|
|
16
|
-
"gpt-4.1-nano" => { input: 0.10, cached_input: 0.025, output: 0.40 },
|
|
17
|
-
"gpt-4o-2024-05-13" => { input: 5.00, output: 15.00 },
|
|
18
|
-
"gpt-4o" => { input: 2.50, cached_input: 1.25, output: 10.00 },
|
|
19
|
-
"gpt-4o-mini" => { input: 0.15, cached_input: 0.075, output: 0.60 },
|
|
20
|
-
"gpt-4-turbo" => { input: 10.00, output: 30.00 },
|
|
21
|
-
"gpt-4" => { input: 30.00, output: 60.00 },
|
|
22
|
-
"gpt-3.5-turbo" => { input: 0.50, output: 1.50 },
|
|
23
|
-
"o1" => { input: 15.00, cached_input: 7.50, output: 60.00 },
|
|
24
|
-
"o1-mini" => { input: 1.10, cached_input: 0.55, output: 4.40 },
|
|
25
|
-
"o3" => { input: 2.00, cached_input: 0.50, output: 8.00 },
|
|
26
|
-
"o3-mini" => { input: 1.10, cached_input: 0.55, output: 4.40 },
|
|
27
|
-
"o4-mini" => { input: 1.10, cached_input: 0.275, output: 4.40 },
|
|
28
|
-
|
|
29
|
-
# Anthropic
|
|
30
|
-
"claude-sonnet-4-6" => { input: 3.00, output: 15.00, cache_read_input: 0.30, cache_creation_input: 3.75 },
|
|
31
|
-
"claude-opus-4-6" => { input: 5.00, output: 25.00, cache_read_input: 0.50, cache_creation_input: 6.25 },
|
|
32
|
-
"claude-opus-4-1" => { input: 15.00, output: 75.00, cache_read_input: 1.50, cache_creation_input: 18.75 },
|
|
33
|
-
"claude-opus-4" => { input: 15.00, output: 75.00, cache_read_input: 1.50, cache_creation_input: 18.75 },
|
|
34
|
-
"claude-sonnet-4-5" => { input: 3.00, output: 15.00, cache_read_input: 0.30, cache_creation_input: 3.75 },
|
|
35
|
-
"claude-sonnet-4" => { input: 3.00, output: 15.00, cache_read_input: 0.30, cache_creation_input: 3.75 },
|
|
36
|
-
"claude-haiku-4-5" => { input: 1.00, output: 5.00, cache_read_input: 0.10, cache_creation_input: 1.25 },
|
|
37
|
-
"claude-3-7-sonnet" => { input: 3.00, output: 15.00, cache_read_input: 0.30, cache_creation_input: 3.75 },
|
|
38
|
-
"claude-3-5-sonnet" => { input: 3.00, output: 15.00, cache_read_input: 0.30, cache_creation_input: 3.75 },
|
|
39
|
-
"claude-3-5-haiku" => { input: 0.80, output: 4.00, cache_read_input: 0.08, cache_creation_input: 1.00 },
|
|
40
|
-
"claude-3-opus" => { input: 15.00, output: 75.00, cache_read_input: 1.50, cache_creation_input: 18.75 },
|
|
41
|
-
|
|
42
|
-
# Google Gemini
|
|
43
|
-
"gemini-2.5-pro" => { input: 1.25, cached_input: 0.125, output: 10.00 },
|
|
44
|
-
"gemini-2.5-flash" => { input: 0.30, cached_input: 0.03, output: 2.50 },
|
|
45
|
-
"gemini-2.5-flash-lite" => { input: 0.10, cached_input: 0.01, output: 0.40 },
|
|
46
|
-
"gemini-2.0-flash" => { input: 0.10, cached_input: 0.025, output: 0.40 },
|
|
47
|
-
"gemini-2.0-flash-lite" => { input: 0.075, output: 0.30 },
|
|
48
|
-
"gemini-1.5-pro" => { input: 1.25, output: 5.00 },
|
|
49
|
-
"gemini-1.5-flash" => { input: 0.075, output: 0.30 }
|
|
50
|
-
}.freeze
|
|
6
|
+
PRICES = PriceRegistry.builtin_prices
|
|
7
|
+
PRICES_MUTEX = Mutex.new
|
|
8
|
+
SORTED_PRICE_KEYS_MUTEX = Mutex.new
|
|
9
|
+
|
|
10
|
+
private_constant :PRICES_MUTEX
|
|
11
|
+
private_constant :SORTED_PRICE_KEYS_MUTEX
|
|
51
12
|
|
|
52
13
|
class << self
|
|
14
|
+
# Estimate model cost from token counts.
|
|
15
|
+
#
|
|
16
|
+
# @param model [String] Provider model identifier.
|
|
17
|
+
# @param input_tokens [Integer] Input token count, including cached tokens if reported that way.
|
|
18
|
+
# @param output_tokens [Integer] Output token count.
|
|
19
|
+
# @param cached_input_tokens [Integer] OpenAI-style cached input tokens.
|
|
20
|
+
# @param cache_read_input_tokens [Integer] Anthropic-style cache read tokens.
|
|
21
|
+
# @param cache_creation_input_tokens [Integer] Anthropic-style cache creation tokens.
|
|
22
|
+
# @return [LlmCostTracker::Cost, nil] nil when no price is configured for the model.
|
|
53
23
|
def cost_for(model:, input_tokens:, output_tokens:, cached_input_tokens: 0,
|
|
54
24
|
cache_read_input_tokens: 0, cache_creation_input_tokens: 0)
|
|
55
25
|
prices = lookup(model)
|
|
56
26
|
return nil unless prices
|
|
57
27
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
uncached_input_tokens = [input_tokens.to_i - cached_input_tokens, 0].max
|
|
62
|
-
|
|
63
|
-
input_cost = (uncached_input_tokens.to_f / 1_000_000) * prices[:input]
|
|
64
|
-
cached_input_cost = (cached_input_tokens.to_f / 1_000_000) *
|
|
65
|
-
(prices[:cached_input] || prices[:input])
|
|
66
|
-
cache_read_input_cost = (cache_read_input_tokens.to_f / 1_000_000) *
|
|
67
|
-
(prices[:cache_read_input] || prices[:cached_input] || prices[:input])
|
|
68
|
-
cache_creation_input_cost = (cache_creation_input_tokens.to_f / 1_000_000) *
|
|
69
|
-
(prices[:cache_creation_input] || prices[:input])
|
|
70
|
-
output_cost = (output_tokens.to_f / 1_000_000) * prices[:output]
|
|
71
|
-
total_cost = input_cost + cached_input_cost + cache_read_input_cost +
|
|
72
|
-
cache_creation_input_cost + output_cost
|
|
28
|
+
token_counts = normalized_token_counts(input_tokens, output_tokens, cached_input_tokens,
|
|
29
|
+
cache_read_input_tokens, cache_creation_input_tokens)
|
|
30
|
+
costs = calculate_costs(token_counts, prices)
|
|
73
31
|
|
|
74
|
-
|
|
75
|
-
input_cost:
|
|
76
|
-
cached_input_cost:
|
|
77
|
-
cache_read_input_cost:
|
|
78
|
-
cache_creation_input_cost:
|
|
79
|
-
output_cost:
|
|
80
|
-
total_cost:
|
|
32
|
+
Cost.new(
|
|
33
|
+
input_cost: costs[:input].round(8),
|
|
34
|
+
cached_input_cost: costs[:cached_input].round(8),
|
|
35
|
+
cache_read_input_cost: costs[:cache_read_input].round(8),
|
|
36
|
+
cache_creation_input_cost: costs[:cache_creation_input].round(8),
|
|
37
|
+
output_cost: costs[:output].round(8),
|
|
38
|
+
total_cost: costs.values.sum.round(8),
|
|
81
39
|
currency: "USD"
|
|
82
|
-
|
|
40
|
+
)
|
|
83
41
|
end
|
|
84
42
|
|
|
85
43
|
def lookup(model)
|
|
86
|
-
|
|
87
|
-
|
|
44
|
+
table = prices
|
|
45
|
+
model_name = model.to_s
|
|
46
|
+
normalized_model = normalize_model_name(model_name)
|
|
47
|
+
|
|
48
|
+
table[model_name] || table[normalized_model] || fuzzy_match(model_name, normalized_model, table)
|
|
88
49
|
end
|
|
89
50
|
|
|
90
51
|
def models
|
|
91
|
-
|
|
52
|
+
prices.keys
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def metadata
|
|
56
|
+
PriceRegistry.metadata
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def prices
|
|
60
|
+
file_prices = PriceRegistry.file_prices(LlmCostTracker.configuration.prices_file)
|
|
61
|
+
overrides = PriceRegistry.normalize_price_table(LlmCostTracker.configuration.pricing_overrides)
|
|
62
|
+
cache_key = [file_prices.object_id, LlmCostTracker.configuration.pricing_overrides.hash]
|
|
63
|
+
|
|
64
|
+
return @prices if @prices_cache_key == cache_key
|
|
65
|
+
|
|
66
|
+
PRICES_MUTEX.synchronize do
|
|
67
|
+
return @prices if @prices_cache_key == cache_key
|
|
68
|
+
|
|
69
|
+
@prices_cache_key = cache_key
|
|
70
|
+
@prices = PRICES.merge(file_prices).merge(overrides).freeze
|
|
71
|
+
end
|
|
92
72
|
end
|
|
93
73
|
|
|
94
74
|
private
|
|
95
75
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
76
|
+
def normalized_token_counts(input_tokens, output_tokens, cached_input_tokens,
|
|
77
|
+
cache_read_input_tokens, cache_creation_input_tokens)
|
|
78
|
+
cached_input_tokens = cached_input_tokens.to_i
|
|
99
79
|
|
|
100
|
-
|
|
101
|
-
|
|
80
|
+
{
|
|
81
|
+
input: [input_tokens.to_i - cached_input_tokens, 0].max,
|
|
82
|
+
cached_input: cached_input_tokens,
|
|
83
|
+
cache_read_input: cache_read_input_tokens.to_i,
|
|
84
|
+
cache_creation_input: cache_creation_input_tokens.to_i,
|
|
85
|
+
output: output_tokens.to_i
|
|
86
|
+
}
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def calculate_costs(token_counts, prices)
|
|
90
|
+
{
|
|
91
|
+
input: token_cost(token_counts[:input], prices[:input]),
|
|
92
|
+
cached_input: token_cost(token_counts[:cached_input], prices[:cached_input] || prices[:input]),
|
|
93
|
+
cache_read_input: token_cost(
|
|
94
|
+
token_counts[:cache_read_input],
|
|
95
|
+
prices[:cache_read_input] || prices[:cached_input] || prices[:input]
|
|
96
|
+
),
|
|
97
|
+
cache_creation_input: token_cost(
|
|
98
|
+
token_counts[:cache_creation_input],
|
|
99
|
+
prices[:cache_creation_input] || prices[:input]
|
|
100
|
+
),
|
|
101
|
+
output: token_cost(token_counts[:output], prices[:output])
|
|
102
|
+
}
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def token_cost(tokens, per_million_price)
|
|
106
|
+
(tokens.to_f / 1_000_000) * per_million_price
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def normalize_model_name(model)
|
|
110
|
+
model.to_s.split("/").last
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Try to match model names like "gpt-4o-2024-08-06" to "gpt-4o".
|
|
114
|
+
def fuzzy_match(model, normalized_model, table)
|
|
115
|
+
sorted_price_keys(table).each do |key|
|
|
116
|
+
return table[key] if model.start_with?(key) || normalized_model.start_with?(key)
|
|
102
117
|
end
|
|
103
118
|
|
|
104
119
|
nil
|
|
105
120
|
end
|
|
121
|
+
|
|
122
|
+
def sorted_price_keys(table)
|
|
123
|
+
return @sorted_price_keys if @sorted_price_keys_table.equal?(table)
|
|
124
|
+
|
|
125
|
+
SORTED_PRICE_KEYS_MUTEX.synchronize do
|
|
126
|
+
return @sorted_price_keys if @sorted_price_keys_table.equal?(table)
|
|
127
|
+
|
|
128
|
+
@sorted_price_keys_table = table
|
|
129
|
+
@sorted_price_keys = table.keys.sort_by { |key| -key.length }
|
|
130
|
+
end
|
|
131
|
+
end
|
|
106
132
|
end
|
|
107
133
|
end
|
|
108
134
|
end
|
|
@@ -3,7 +3,15 @@
|
|
|
3
3
|
module LlmCostTracker
|
|
4
4
|
class Railtie < Rails::Railtie
|
|
5
5
|
generators do
|
|
6
|
+
require_relative "generators/llm_cost_tracker/add_latency_ms_generator"
|
|
6
7
|
require_relative "generators/llm_cost_tracker/install_generator"
|
|
8
|
+
require_relative "generators/llm_cost_tracker/prices_generator"
|
|
9
|
+
require_relative "generators/llm_cost_tracker/upgrade_cost_precision_generator"
|
|
10
|
+
require_relative "generators/llm_cost_tracker/upgrade_tags_to_jsonb_generator"
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
rake_tasks do
|
|
14
|
+
load File.expand_path("../tasks/llm_cost_tracker.rake", __dir__)
|
|
7
15
|
end
|
|
8
16
|
|
|
9
17
|
initializer "llm_cost_tracker.configure" do
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "report_data"
|
|
4
|
+
require_relative "report_formatter"
|
|
5
|
+
|
|
6
|
+
module LlmCostTracker
|
|
7
|
+
class Report
|
|
8
|
+
DEFAULT_DAYS = ReportData::DEFAULT_DAYS
|
|
9
|
+
|
|
10
|
+
class << self
|
|
11
|
+
# Render a terminal-friendly cost report from ActiveRecord storage.
|
|
12
|
+
#
|
|
13
|
+
# @param days [Integer] Number of trailing days to include.
|
|
14
|
+
# @param now [Time] Report end time.
|
|
15
|
+
# @return [String]
|
|
16
|
+
def generate(days: DEFAULT_DAYS, now: Time.now.utc)
|
|
17
|
+
ReportFormatter.new(data(days: days, now: now)).to_s
|
|
18
|
+
rescue LoadError => e
|
|
19
|
+
"Unable to build LLM cost report: ActiveRecord storage is unavailable (#{e.message})"
|
|
20
|
+
rescue StandardError => e
|
|
21
|
+
"Unable to build LLM cost report: #{e.class}: #{e.message}"
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def data(days: DEFAULT_DAYS, now: Time.now.utc)
|
|
25
|
+
ReportData.build(days: days, now: now)
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "value_object"
|
|
4
|
+
|
|
5
|
+
module LlmCostTracker
|
|
6
|
+
TopCall = ValueObject.define(:provider, :model, :total_cost)
|
|
7
|
+
|
|
8
|
+
ReportData = ValueObject.define(
|
|
9
|
+
:days,
|
|
10
|
+
:from_time,
|
|
11
|
+
:to_time,
|
|
12
|
+
:total_cost,
|
|
13
|
+
:requests_count,
|
|
14
|
+
:average_latency_ms,
|
|
15
|
+
:unknown_pricing_count,
|
|
16
|
+
:cost_by_provider,
|
|
17
|
+
:cost_by_model,
|
|
18
|
+
:cost_by_feature,
|
|
19
|
+
:top_calls
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
ReportData.const_set(:DEFAULT_DAYS, 30)
|
|
23
|
+
ReportData.const_set(:TOP_LIMIT, 5)
|
|
24
|
+
|
|
25
|
+
class << ReportData
|
|
26
|
+
def build(days: ReportData::DEFAULT_DAYS, now: Time.now.utc)
|
|
27
|
+
require_relative "llm_api_call" unless defined?(LlmCostTracker::LlmApiCall)
|
|
28
|
+
|
|
29
|
+
days = normalized_days(days)
|
|
30
|
+
scope = LlmApiCall.where(tracked_at: from_time(days, now)..now)
|
|
31
|
+
|
|
32
|
+
new(
|
|
33
|
+
days: days,
|
|
34
|
+
from_time: from_time(days, now),
|
|
35
|
+
to_time: now,
|
|
36
|
+
total_cost: scope.sum(:total_cost).to_f,
|
|
37
|
+
requests_count: scope.count,
|
|
38
|
+
average_latency_ms: average_latency_ms(scope),
|
|
39
|
+
unknown_pricing_count: scope.where(total_cost: nil).count,
|
|
40
|
+
cost_by_provider: cost_by(scope, :provider),
|
|
41
|
+
cost_by_model: cost_by(scope, :model),
|
|
42
|
+
cost_by_feature: cost_by_feature(scope),
|
|
43
|
+
top_calls: top_calls(scope)
|
|
44
|
+
)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
private
|
|
48
|
+
|
|
49
|
+
def normalized_days(days)
|
|
50
|
+
days = days.to_i
|
|
51
|
+
days.positive? ? days : ReportData::DEFAULT_DAYS
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def from_time(days, now)
|
|
55
|
+
now - (days * 86_400)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def average_latency_ms(scope)
|
|
59
|
+
return nil unless LlmApiCall.latency_column?
|
|
60
|
+
|
|
61
|
+
scope.average(:latency_ms)&.to_f
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def cost_by(scope, column)
|
|
65
|
+
scope.group(column).sum(:total_cost).transform_values(&:to_f).sort_by { |_name, cost| -cost }
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def cost_by_feature(scope)
|
|
69
|
+
costs = Hash.new(0.0)
|
|
70
|
+
scope.select(:id, :tags, :total_cost).find_each do |call|
|
|
71
|
+
costs[call.feature || "(untagged)"] += call.total_cost.to_f
|
|
72
|
+
end
|
|
73
|
+
costs.sort_by { |_feature, cost| -cost }
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def top_calls(scope)
|
|
77
|
+
scope
|
|
78
|
+
.where.not(total_cost: nil)
|
|
79
|
+
.order(total_cost: :desc)
|
|
80
|
+
.limit(ReportData::TOP_LIMIT)
|
|
81
|
+
.map { |call| TopCall.new(provider: call.provider, model: call.model, total_cost: call.total_cost.to_f) }
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LlmCostTracker
|
|
4
|
+
class ReportFormatter
|
|
5
|
+
TOP_LIMIT = 5
|
|
6
|
+
|
|
7
|
+
def initialize(data)
|
|
8
|
+
@data = data
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def to_s
|
|
12
|
+
lines = ["LLM Cost Report (last #{@data.days} days)", ""]
|
|
13
|
+
append_summary(lines)
|
|
14
|
+
append_cost_section(lines, "By provider", @data.cost_by_provider)
|
|
15
|
+
append_cost_section(lines, "By model", @data.cost_by_model)
|
|
16
|
+
append_cost_section(lines, "By feature", @data.cost_by_feature)
|
|
17
|
+
append_top_calls(lines)
|
|
18
|
+
lines.join("\n")
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
private
|
|
22
|
+
|
|
23
|
+
def append_summary(lines)
|
|
24
|
+
lines << "Total cost: #{money(@data.total_cost)}"
|
|
25
|
+
lines << "Requests: #{@data.requests_count}"
|
|
26
|
+
lines << "Avg latency: #{average_latency}"
|
|
27
|
+
lines << "Unknown pricing: #{@data.unknown_pricing_count}"
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def append_cost_section(lines, title, rows)
|
|
31
|
+
lines << ""
|
|
32
|
+
lines << "#{title}:"
|
|
33
|
+
return lines << " none" if rows.empty?
|
|
34
|
+
|
|
35
|
+
rows.first(TOP_LIMIT).each do |name, cost|
|
|
36
|
+
lines << " #{name.to_s.ljust(28)} #{money(cost)}"
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def append_top_calls(lines)
|
|
41
|
+
lines << ""
|
|
42
|
+
lines << "Top expensive calls:"
|
|
43
|
+
return lines << " none" if @data.top_calls.empty?
|
|
44
|
+
|
|
45
|
+
@data.top_calls.first(TOP_LIMIT).each do |call|
|
|
46
|
+
label = "#{call.provider}/#{call.model}"
|
|
47
|
+
lines << " #{label.ljust(32)} #{money(call.total_cost)}"
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def average_latency
|
|
52
|
+
@data.average_latency_ms ? "#{@data.average_latency_ms.round}ms" : "n/a"
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def money(value)
|
|
56
|
+
"$#{format('%.6f', value.to_f)}"
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LlmCostTracker
|
|
4
|
+
module Storage
|
|
5
|
+
module ActiveRecordBackend
|
|
6
|
+
class << self
|
|
7
|
+
def save(event, **_options)
|
|
8
|
+
require_relative "../llm_api_call" unless defined?(LlmCostTracker::LlmApiCall)
|
|
9
|
+
require_relative "active_record_store" unless defined?(LlmCostTracker::Storage::ActiveRecordStore)
|
|
10
|
+
|
|
11
|
+
ActiveRecordStore.save(event)
|
|
12
|
+
event
|
|
13
|
+
rescue LoadError => e
|
|
14
|
+
raise Error, "ActiveRecord storage requires the active_record gem: #{e.message}"
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
@@ -5,18 +5,23 @@ module LlmCostTracker
|
|
|
5
5
|
class ActiveRecordStore
|
|
6
6
|
class << self
|
|
7
7
|
def save(event)
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
)
|
|
8
|
+
tags = stringify_tags(event.tags || {})
|
|
9
|
+
|
|
10
|
+
attributes = {
|
|
11
|
+
provider: event.provider,
|
|
12
|
+
model: event.model,
|
|
13
|
+
input_tokens: event.input_tokens,
|
|
14
|
+
output_tokens: event.output_tokens,
|
|
15
|
+
total_tokens: event.total_tokens,
|
|
16
|
+
input_cost: event.cost&.input_cost,
|
|
17
|
+
output_cost: event.cost&.output_cost,
|
|
18
|
+
total_cost: event.cost&.total_cost,
|
|
19
|
+
tags: tags_for_storage(tags),
|
|
20
|
+
tracked_at: event.tracked_at
|
|
21
|
+
}
|
|
22
|
+
attributes[:latency_ms] = event.latency_ms if model_class.latency_column?
|
|
23
|
+
|
|
24
|
+
model_class.create!(attributes)
|
|
20
25
|
end
|
|
21
26
|
|
|
22
27
|
def monthly_total(time: Time.now.utc)
|
|
@@ -38,6 +43,10 @@ module LlmCostTracker
|
|
|
38
43
|
tags.transform_keys(&:to_s).transform_values { |value| stringify_tag_value(value) }
|
|
39
44
|
end
|
|
40
45
|
|
|
46
|
+
def tags_for_storage(tags)
|
|
47
|
+
model_class.tags_json_column? ? tags : tags.to_json
|
|
48
|
+
end
|
|
49
|
+
|
|
41
50
|
def stringify_tag_value(value)
|
|
42
51
|
return value.transform_values { |nested| stringify_tag_value(nested) } if value.is_a?(Hash)
|
|
43
52
|
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../errors"
|
|
4
|
+
require_relative "log_backend"
|
|
5
|
+
require_relative "active_record_backend"
|
|
6
|
+
require_relative "custom_backend"
|
|
7
|
+
|
|
8
|
+
module LlmCostTracker
|
|
9
|
+
module Storage
|
|
10
|
+
module Backends
|
|
11
|
+
MAP = {
|
|
12
|
+
log: LogBackend,
|
|
13
|
+
active_record: ActiveRecordBackend,
|
|
14
|
+
custom: CustomBackend
|
|
15
|
+
}.freeze
|
|
16
|
+
|
|
17
|
+
class << self
|
|
18
|
+
def fetch(name)
|
|
19
|
+
MAP.fetch(name.to_sym)
|
|
20
|
+
rescue KeyError
|
|
21
|
+
raise Error, "Unknown storage_backend: #{name.inspect}. Use one of: #{MAP.keys.join(', ')}"
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LlmCostTracker
|
|
4
|
+
module Storage
|
|
5
|
+
module CustomBackend
|
|
6
|
+
class << self
|
|
7
|
+
def save(event, config:)
|
|
8
|
+
result = config.custom_storage&.call(event)
|
|
9
|
+
return false if result == false
|
|
10
|
+
|
|
11
|
+
event
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../logging"
|
|
4
|
+
|
|
5
|
+
module LlmCostTracker
|
|
6
|
+
module Storage
|
|
7
|
+
module LogBackend
|
|
8
|
+
class << self
|
|
9
|
+
def save(event, config:)
|
|
10
|
+
message = "#{event.provider}/#{event.model} " \
|
|
11
|
+
"tokens=#{event.input_tokens}+#{event.output_tokens} " \
|
|
12
|
+
"cost=#{cost_label(event)}"
|
|
13
|
+
message += " latency=#{event.latency_ms}ms" if event.latency_ms
|
|
14
|
+
message += " tags=#{event.tags}" unless event.tags.empty?
|
|
15
|
+
|
|
16
|
+
Logging.log(config.log_level, message)
|
|
17
|
+
event
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
private
|
|
21
|
+
|
|
22
|
+
def cost_label(event)
|
|
23
|
+
event.cost ? "$#{format('%.6f', event.cost.total_cost)}" : "unknown"
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module LlmCostTracker
|
|
6
|
+
module TagAccessors
|
|
7
|
+
def parsed_tags
|
|
8
|
+
return tags.transform_keys(&:to_s) if tags.is_a?(Hash)
|
|
9
|
+
|
|
10
|
+
JSON.parse(tags || "{}")
|
|
11
|
+
rescue JSON::ParserError
|
|
12
|
+
{}
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def feature
|
|
16
|
+
parsed_tags["feature"]
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def user_id
|
|
20
|
+
parsed_tags["user_id"]
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module LlmCostTracker
|
|
6
|
+
module TagQuery
|
|
7
|
+
class << self
|
|
8
|
+
def apply(model, tags)
|
|
9
|
+
normalized_tags = normalize_tags(tags)
|
|
10
|
+
return model.all if normalized_tags.empty?
|
|
11
|
+
|
|
12
|
+
return json_query(model, normalized_tags) if model.tags_json_column?
|
|
13
|
+
|
|
14
|
+
text_query(model, normalized_tags)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def normalize_tags(tags)
|
|
18
|
+
(tags || {}).to_h.transform_keys(&:to_s).transform_values(&:to_s)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
private
|
|
22
|
+
|
|
23
|
+
def json_query(model, tags)
|
|
24
|
+
model.where("tags @> ?::jsonb", tags.to_json)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def text_query(model, tags)
|
|
28
|
+
tags.reduce(model.all) do |relation, (key, value)|
|
|
29
|
+
relation.where("tags LIKE ? ESCAPE '\\'", "%#{model.sanitize_sql_like(json_tag_fragment(key, value))}%")
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def json_tag_fragment(key, value)
|
|
34
|
+
JSON.generate(key => value).delete_prefix("{").delete_suffix("}")
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LlmCostTracker
|
|
4
|
+
module TagsColumn
|
|
5
|
+
def tags_json_column?
|
|
6
|
+
column = columns_hash["tags"]
|
|
7
|
+
return false unless column
|
|
8
|
+
|
|
9
|
+
%i[json jsonb].include?(column.type) || column.sql_type.to_s.downcase == "jsonb"
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def latency_column?
|
|
13
|
+
columns_hash.key?("latency_ms")
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|