llm_cost_tracker 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +44 -0
- data/CHANGELOG.md +62 -0
- data/README.md +243 -26
- data/Rakefile +3 -1
- data/lib/llm_cost_tracker/budget.rb +97 -0
- data/lib/llm_cost_tracker/configuration.rb +37 -0
- data/lib/llm_cost_tracker/errors.rb +37 -0
- data/lib/llm_cost_tracker/event_metadata.rb +54 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_latency_ms_generator.rb +29 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_latency_ms_to_llm_api_calls.rb.erb +9 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_api_calls.rb.erb +16 -4
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +14 -1
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_cost_precision.rb.erb +15 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_tags_to_jsonb.rb.erb +41 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_cost_precision_generator.rb +29 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_tags_to_jsonb_generator.rb +29 -0
- data/lib/llm_cost_tracker/llm_api_call.rb +69 -1
- data/lib/llm_cost_tracker/middleware/faraday.rb +51 -14
- data/lib/llm_cost_tracker/parsers/anthropic.rb +10 -5
- data/lib/llm_cost_tracker/parsers/gemini.rb +13 -5
- data/lib/llm_cost_tracker/parsers/openai.rb +22 -7
- data/lib/llm_cost_tracker/parsers/openai_compatible.rb +44 -0
- data/lib/llm_cost_tracker/parsers/registry.rb +16 -7
- data/lib/llm_cost_tracker/price_registry.rb +69 -0
- data/lib/llm_cost_tracker/prices.json +51 -0
- data/lib/llm_cost_tracker/pricing.rb +76 -41
- data/lib/llm_cost_tracker/railtie.rb +3 -0
- data/lib/llm_cost_tracker/storage/active_record_store.rb +24 -3
- data/lib/llm_cost_tracker/tracker.rb +65 -33
- data/lib/llm_cost_tracker/unknown_pricing.rb +47 -0
- data/lib/llm_cost_tracker/version.rb +1 -1
- data/lib/llm_cost_tracker.rb +33 -5
- data/llm_cost_tracker.gemspec +9 -7
- metadata +38 -23
|
@@ -1,16 +1,19 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "json"
|
|
4
|
+
require "uri"
|
|
5
|
+
|
|
6
|
+
require_relative "base"
|
|
4
7
|
|
|
5
8
|
module LlmCostTracker
|
|
6
9
|
module Parsers
|
|
7
10
|
class Openai < Base
|
|
8
11
|
HOSTS = %w[api.openai.com].freeze
|
|
9
|
-
TRACKED_PATHS = %w[/v1/chat/completions /v1/completions /v1/embeddings].freeze
|
|
12
|
+
TRACKED_PATHS = %w[/v1/chat/completions /v1/completions /v1/embeddings /v1/responses].freeze
|
|
10
13
|
|
|
11
14
|
def match?(url)
|
|
12
15
|
uri = URI.parse(url.to_s)
|
|
13
|
-
HOSTS.include?(uri.host) && TRACKED_PATHS.
|
|
16
|
+
HOSTS.include?(uri.host.to_s.downcase) && TRACKED_PATHS.include?(uri.path)
|
|
14
17
|
rescue URI::InvalidURIError
|
|
15
18
|
false
|
|
16
19
|
end
|
|
@@ -25,12 +28,24 @@ module LlmCostTracker
|
|
|
25
28
|
request = safe_json_parse(request_body)
|
|
26
29
|
|
|
27
30
|
{
|
|
28
|
-
provider:
|
|
31
|
+
provider: provider_for(request_url),
|
|
29
32
|
model: response["model"] || request["model"],
|
|
30
|
-
input_tokens: usage["prompt_tokens"] || 0,
|
|
31
|
-
output_tokens: usage["completion_tokens"] || 0,
|
|
32
|
-
total_tokens: usage["total_tokens"] || 0
|
|
33
|
-
|
|
33
|
+
input_tokens: usage["prompt_tokens"] || usage["input_tokens"] || 0,
|
|
34
|
+
output_tokens: usage["completion_tokens"] || usage["output_tokens"] || 0,
|
|
35
|
+
total_tokens: usage["total_tokens"] || 0,
|
|
36
|
+
cached_input_tokens: cached_input_tokens(usage)
|
|
37
|
+
}.compact
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
private
|
|
41
|
+
|
|
42
|
+
def provider_for(_request_url)
|
|
43
|
+
"openai"
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def cached_input_tokens(usage)
|
|
47
|
+
details = usage["prompt_tokens_details"] || usage["input_tokens_details"] || {}
|
|
48
|
+
details["cached_tokens"]
|
|
34
49
|
end
|
|
35
50
|
end
|
|
36
51
|
end
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "openai"
|
|
4
|
+
|
|
5
|
+
module LlmCostTracker
|
|
6
|
+
module Parsers
|
|
7
|
+
class OpenaiCompatible < Openai
|
|
8
|
+
TRACKED_PATH_SUFFIXES = %w[/chat/completions /completions /embeddings /responses].freeze
|
|
9
|
+
|
|
10
|
+
def match?(url)
|
|
11
|
+
uri = URI.parse(url.to_s)
|
|
12
|
+
!provider_for_host(uri.host).nil? && tracked_path?(uri.path)
|
|
13
|
+
rescue URI::InvalidURIError
|
|
14
|
+
false
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
private
|
|
18
|
+
|
|
19
|
+
def provider_for(request_url)
|
|
20
|
+
uri = URI.parse(request_url.to_s)
|
|
21
|
+
provider_for_host(uri.host) || "openai_compatible"
|
|
22
|
+
rescue URI::InvalidURIError
|
|
23
|
+
"openai_compatible"
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def provider_for_host(host)
|
|
27
|
+
host = host.to_s.downcase
|
|
28
|
+
provider_name = configured_providers[host] ||
|
|
29
|
+
configured_providers.find do |configured_host, _provider|
|
|
30
|
+
configured_host.to_s.downcase == host
|
|
31
|
+
end&.last
|
|
32
|
+
provider_name&.to_s
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def configured_providers
|
|
36
|
+
LlmCostTracker.configuration.openai_compatible_providers
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def tracked_path?(path)
|
|
40
|
+
TRACKED_PATH_SUFFIXES.any? { |suffix| path == suffix || path.end_with?(suffix) }
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
@@ -4,16 +4,14 @@ module LlmCostTracker
|
|
|
4
4
|
module Parsers
|
|
5
5
|
class Registry
|
|
6
6
|
class << self
|
|
7
|
+
PARSERS_MUTEX = Mutex.new
|
|
8
|
+
|
|
7
9
|
def parsers
|
|
8
|
-
@parsers ||=
|
|
9
|
-
Openai.new,
|
|
10
|
-
Anthropic.new,
|
|
11
|
-
Gemini.new
|
|
12
|
-
]
|
|
10
|
+
@parsers || PARSERS_MUTEX.synchronize { @parsers ||= default_parsers }
|
|
13
11
|
end
|
|
14
12
|
|
|
15
13
|
def register(parser)
|
|
16
|
-
parsers.unshift(parser)
|
|
14
|
+
PARSERS_MUTEX.synchronize { parsers.unshift(parser) }
|
|
17
15
|
end
|
|
18
16
|
|
|
19
17
|
def find_for(url)
|
|
@@ -21,7 +19,18 @@ module LlmCostTracker
|
|
|
21
19
|
end
|
|
22
20
|
|
|
23
21
|
def reset!
|
|
24
|
-
@parsers = nil
|
|
22
|
+
PARSERS_MUTEX.synchronize { @parsers = nil }
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
private
|
|
26
|
+
|
|
27
|
+
def default_parsers
|
|
28
|
+
[
|
|
29
|
+
Openai.new,
|
|
30
|
+
OpenaiCompatible.new,
|
|
31
|
+
Anthropic.new,
|
|
32
|
+
Gemini.new
|
|
33
|
+
]
|
|
25
34
|
end
|
|
26
35
|
end
|
|
27
36
|
end
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require "yaml"
|
|
5
|
+
|
|
6
|
+
module LlmCostTracker
|
|
7
|
+
module PriceRegistry
|
|
8
|
+
DEFAULT_PRICES_PATH = File.expand_path("prices.json", __dir__)
|
|
9
|
+
PRICE_KEYS = %w[input cached_input output cache_read_input cache_creation_input].freeze
|
|
10
|
+
NORMALIZE_PRICE_ENTRY = lambda do |price|
|
|
11
|
+
(price || {}).each_with_object({}) do |(key, value), normalized|
|
|
12
|
+
key = key.to_s
|
|
13
|
+
normalized[key.to_sym] = Float(value) if PRICE_KEYS.include?(key)
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
NORMALIZE_PRICE_TABLE = lambda do |table|
|
|
17
|
+
(table || {}).each_with_object({}) do |(model, price), normalized|
|
|
18
|
+
normalized[model.to_s] = NORMALIZE_PRICE_ENTRY.call(price)
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
RAW_REGISTRY = JSON.parse(File.read(DEFAULT_PRICES_PATH)).freeze
|
|
22
|
+
PRICE_METADATA = RAW_REGISTRY.fetch("metadata", {}).freeze
|
|
23
|
+
BUILTIN_PRICES = NORMALIZE_PRICE_TABLE.call(RAW_REGISTRY.fetch("models", {})).freeze
|
|
24
|
+
|
|
25
|
+
class << self
|
|
26
|
+
def builtin_prices
|
|
27
|
+
BUILTIN_PRICES
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def metadata
|
|
31
|
+
PRICE_METADATA
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def normalize_price_table(table)
|
|
35
|
+
NORMALIZE_PRICE_TABLE.call(table)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def file_prices(path)
|
|
39
|
+
return {} unless path
|
|
40
|
+
|
|
41
|
+
path = path.to_s
|
|
42
|
+
cache_key = [path, File.mtime(path).to_f]
|
|
43
|
+
return @file_prices if @file_prices_cache_key == cache_key
|
|
44
|
+
|
|
45
|
+
@file_prices_cache_key = cache_key
|
|
46
|
+
@file_prices = normalize_price_table(price_file_models(load_price_file(path)))
|
|
47
|
+
rescue Errno::ENOENT, JSON::ParserError, Psych::Exception, ArgumentError, TypeError => e
|
|
48
|
+
raise Error, "Unable to load prices_file #{path.inspect}: #{e.message}"
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
private
|
|
52
|
+
|
|
53
|
+
def load_price_file(path)
|
|
54
|
+
contents = File.read(path)
|
|
55
|
+
return YAML.safe_load(contents, aliases: false) || {} if yaml_file?(path)
|
|
56
|
+
|
|
57
|
+
JSON.parse(contents)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def yaml_file?(path)
|
|
61
|
+
%w[.yaml .yml].include?(File.extname(path).downcase)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def price_file_models(registry)
|
|
65
|
+
registry.fetch("models", registry)
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
{
|
|
2
|
+
"metadata": {
|
|
3
|
+
"updated_at": "2026-04-18",
|
|
4
|
+
"currency": "USD",
|
|
5
|
+
"unit": "1M tokens",
|
|
6
|
+
"source_urls": [
|
|
7
|
+
"https://openai.com/api/pricing",
|
|
8
|
+
"https://www.anthropic.com/pricing",
|
|
9
|
+
"https://ai.google.dev/gemini-api/docs/pricing"
|
|
10
|
+
]
|
|
11
|
+
},
|
|
12
|
+
"models": {
|
|
13
|
+
"gpt-5.2": { "input": 1.75, "cached_input": 0.175, "output": 14.0 },
|
|
14
|
+
"gpt-5.1": { "input": 1.25, "cached_input": 0.125, "output": 10.0 },
|
|
15
|
+
"gpt-5": { "input": 1.25, "cached_input": 0.125, "output": 10.0 },
|
|
16
|
+
"gpt-5-mini": { "input": 0.25, "cached_input": 0.025, "output": 2.0 },
|
|
17
|
+
"gpt-5-nano": { "input": 0.05, "cached_input": 0.005, "output": 0.4 },
|
|
18
|
+
"gpt-4.1": { "input": 2.0, "cached_input": 0.5, "output": 8.0 },
|
|
19
|
+
"gpt-4.1-mini": { "input": 0.4, "cached_input": 0.1, "output": 1.6 },
|
|
20
|
+
"gpt-4.1-nano": { "input": 0.1, "cached_input": 0.025, "output": 0.4 },
|
|
21
|
+
"gpt-4o-2024-05-13": { "input": 5.0, "output": 15.0 },
|
|
22
|
+
"gpt-4o": { "input": 2.5, "cached_input": 1.25, "output": 10.0 },
|
|
23
|
+
"gpt-4o-mini": { "input": 0.15, "cached_input": 0.075, "output": 0.6 },
|
|
24
|
+
"gpt-4-turbo": { "input": 10.0, "output": 30.0 },
|
|
25
|
+
"gpt-4": { "input": 30.0, "output": 60.0 },
|
|
26
|
+
"gpt-3.5-turbo": { "input": 0.5, "output": 1.5 },
|
|
27
|
+
"o1": { "input": 15.0, "cached_input": 7.5, "output": 60.0 },
|
|
28
|
+
"o1-mini": { "input": 1.1, "cached_input": 0.55, "output": 4.4 },
|
|
29
|
+
"o3": { "input": 2.0, "cached_input": 0.5, "output": 8.0 },
|
|
30
|
+
"o3-mini": { "input": 1.1, "cached_input": 0.55, "output": 4.4 },
|
|
31
|
+
"o4-mini": { "input": 1.1, "cached_input": 0.275, "output": 4.4 },
|
|
32
|
+
"claude-sonnet-4-6": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_creation_input": 3.75 },
|
|
33
|
+
"claude-opus-4-6": { "input": 5.0, "output": 25.0, "cache_read_input": 0.5, "cache_creation_input": 6.25 },
|
|
34
|
+
"claude-opus-4-1": { "input": 15.0, "output": 75.0, "cache_read_input": 1.5, "cache_creation_input": 18.75 },
|
|
35
|
+
"claude-opus-4": { "input": 15.0, "output": 75.0, "cache_read_input": 1.5, "cache_creation_input": 18.75 },
|
|
36
|
+
"claude-sonnet-4-5": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_creation_input": 3.75 },
|
|
37
|
+
"claude-sonnet-4": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_creation_input": 3.75 },
|
|
38
|
+
"claude-haiku-4-5": { "input": 1.0, "output": 5.0, "cache_read_input": 0.1, "cache_creation_input": 1.25 },
|
|
39
|
+
"claude-3-7-sonnet": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_creation_input": 3.75 },
|
|
40
|
+
"claude-3-5-sonnet": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_creation_input": 3.75 },
|
|
41
|
+
"claude-3-5-haiku": { "input": 0.8, "output": 4.0, "cache_read_input": 0.08, "cache_creation_input": 1.0 },
|
|
42
|
+
"claude-3-opus": { "input": 15.0, "output": 75.0, "cache_read_input": 1.5, "cache_creation_input": 18.75 },
|
|
43
|
+
"gemini-2.5-pro": { "input": 1.25, "cached_input": 0.125, "output": 10.0 },
|
|
44
|
+
"gemini-2.5-flash": { "input": 0.3, "cached_input": 0.03, "output": 2.5 },
|
|
45
|
+
"gemini-2.5-flash-lite": { "input": 0.1, "cached_input": 0.01, "output": 0.4 },
|
|
46
|
+
"gemini-2.0-flash": { "input": 0.1, "cached_input": 0.025, "output": 0.4 },
|
|
47
|
+
"gemini-2.0-flash-lite": { "input": 0.075, "output": 0.3 },
|
|
48
|
+
"gemini-1.5-pro": { "input": 1.25, "output": 5.0 },
|
|
49
|
+
"gemini-1.5-flash": { "input": 0.075, "output": 0.3 }
|
|
50
|
+
}
|
|
51
|
+
}
|
|
@@ -2,72 +2,107 @@
|
|
|
2
2
|
|
|
3
3
|
module LlmCostTracker
|
|
4
4
|
# Prices per 1M tokens in USD.
|
|
5
|
-
# Updated: April 2026. Override via configuration.
|
|
6
5
|
module Pricing
|
|
7
|
-
PRICES =
|
|
8
|
-
# OpenAI
|
|
9
|
-
"gpt-4o" => { input: 2.50, output: 10.00 },
|
|
10
|
-
"gpt-4o-mini" => { input: 0.15, output: 0.60 },
|
|
11
|
-
"gpt-4-turbo" => { input: 10.00, output: 30.00 },
|
|
12
|
-
"gpt-4" => { input: 30.00, output: 60.00 },
|
|
13
|
-
"gpt-3.5-turbo" => { input: 0.50, output: 1.50 },
|
|
14
|
-
"o1" => { input: 15.00, output: 60.00 },
|
|
15
|
-
"o1-mini" => { input: 3.00, output: 12.00 },
|
|
16
|
-
"o3-mini" => { input: 1.10, output: 4.40 },
|
|
17
|
-
|
|
18
|
-
# Anthropic
|
|
19
|
-
"claude-sonnet-4-6" => { input: 3.00, output: 15.00 },
|
|
20
|
-
"claude-opus-4-6" => { input: 15.00, output: 75.00 },
|
|
21
|
-
"claude-haiku-4-5" => { input: 0.80, output: 4.00 },
|
|
22
|
-
"claude-3-5-sonnet-20241022" => { input: 3.00, output: 15.00 },
|
|
23
|
-
"claude-3-5-haiku-20241022" => { input: 0.80, output: 4.00 },
|
|
24
|
-
"claude-3-opus-20240229" => { input: 15.00, output: 75.00 },
|
|
25
|
-
|
|
26
|
-
# Google Gemini
|
|
27
|
-
"gemini-2.5-pro" => { input: 1.25, output: 10.00 },
|
|
28
|
-
"gemini-2.5-flash" => { input: 0.15, output: 0.60 },
|
|
29
|
-
"gemini-2.0-flash" => { input: 0.10, output: 0.40 },
|
|
30
|
-
"gemini-1.5-pro" => { input: 1.25, output: 5.00 },
|
|
31
|
-
"gemini-1.5-flash" => { input: 0.075, output: 0.30 },
|
|
32
|
-
}.freeze
|
|
6
|
+
PRICES = PriceRegistry.builtin_prices
|
|
33
7
|
|
|
34
8
|
class << self
|
|
35
|
-
def cost_for(model:, input_tokens:, output_tokens:
|
|
9
|
+
def cost_for(model:, input_tokens:, output_tokens:, cached_input_tokens: 0,
|
|
10
|
+
cache_read_input_tokens: 0, cache_creation_input_tokens: 0)
|
|
36
11
|
prices = lookup(model)
|
|
37
12
|
return nil unless prices
|
|
38
13
|
|
|
39
|
-
|
|
40
|
-
|
|
14
|
+
token_counts = normalized_token_counts(input_tokens, output_tokens, cached_input_tokens,
|
|
15
|
+
cache_read_input_tokens, cache_creation_input_tokens)
|
|
16
|
+
costs = calculate_costs(token_counts, prices)
|
|
41
17
|
|
|
42
18
|
{
|
|
43
|
-
input_cost:
|
|
44
|
-
|
|
45
|
-
|
|
19
|
+
input_cost: costs[:input].round(8),
|
|
20
|
+
cached_input_cost: costs[:cached_input].round(8),
|
|
21
|
+
cache_read_input_cost: costs[:cache_read_input].round(8),
|
|
22
|
+
cache_creation_input_cost: costs[:cache_creation_input].round(8),
|
|
23
|
+
output_cost: costs[:output].round(8),
|
|
24
|
+
total_cost: costs.values.sum.round(8),
|
|
46
25
|
currency: "USD"
|
|
47
26
|
}
|
|
48
27
|
end
|
|
49
28
|
|
|
50
29
|
def lookup(model)
|
|
51
|
-
|
|
52
|
-
|
|
30
|
+
table = prices
|
|
31
|
+
model_name = model.to_s
|
|
32
|
+
normalized_model = normalize_model_name(model_name)
|
|
33
|
+
|
|
34
|
+
table[model_name] || table[normalized_model] || fuzzy_match(model_name, normalized_model, table)
|
|
53
35
|
end
|
|
54
36
|
|
|
55
37
|
def models
|
|
56
|
-
|
|
38
|
+
prices.keys
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def metadata
|
|
42
|
+
PriceRegistry.metadata
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def prices
|
|
46
|
+
PRICES
|
|
47
|
+
.merge(PriceRegistry.file_prices(LlmCostTracker.configuration.prices_file))
|
|
48
|
+
.merge(PriceRegistry.normalize_price_table(LlmCostTracker.configuration.pricing_overrides))
|
|
57
49
|
end
|
|
58
50
|
|
|
59
51
|
private
|
|
60
52
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
53
|
+
def normalized_token_counts(input_tokens, output_tokens, cached_input_tokens,
|
|
54
|
+
cache_read_input_tokens, cache_creation_input_tokens)
|
|
55
|
+
cached_input_tokens = cached_input_tokens.to_i
|
|
56
|
+
|
|
57
|
+
{
|
|
58
|
+
input: [input_tokens.to_i - cached_input_tokens, 0].max,
|
|
59
|
+
cached_input: cached_input_tokens,
|
|
60
|
+
cache_read_input: cache_read_input_tokens.to_i,
|
|
61
|
+
cache_creation_input: cache_creation_input_tokens.to_i,
|
|
62
|
+
output: output_tokens.to_i
|
|
63
|
+
}
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def calculate_costs(token_counts, prices)
|
|
67
|
+
{
|
|
68
|
+
input: token_cost(token_counts[:input], prices[:input]),
|
|
69
|
+
cached_input: token_cost(token_counts[:cached_input], prices[:cached_input] || prices[:input]),
|
|
70
|
+
cache_read_input: token_cost(
|
|
71
|
+
token_counts[:cache_read_input],
|
|
72
|
+
prices[:cache_read_input] || prices[:cached_input] || prices[:input]
|
|
73
|
+
),
|
|
74
|
+
cache_creation_input: token_cost(
|
|
75
|
+
token_counts[:cache_creation_input],
|
|
76
|
+
prices[:cache_creation_input] || prices[:input]
|
|
77
|
+
),
|
|
78
|
+
output: token_cost(token_counts[:output], prices[:output])
|
|
79
|
+
}
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def token_cost(tokens, per_million_price)
|
|
83
|
+
(tokens.to_f / 1_000_000) * per_million_price
|
|
84
|
+
end
|
|
64
85
|
|
|
65
|
-
|
|
66
|
-
|
|
86
|
+
def normalize_model_name(model)
|
|
87
|
+
model.to_s.split("/").last
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Try to match model names like "gpt-4o-2024-08-06" to "gpt-4o".
|
|
91
|
+
def fuzzy_match(model, normalized_model, table)
|
|
92
|
+
sorted_price_keys(table).each do |key|
|
|
93
|
+
return table[key] if model.start_with?(key) || normalized_model.start_with?(key)
|
|
67
94
|
end
|
|
68
95
|
|
|
69
96
|
nil
|
|
70
97
|
end
|
|
98
|
+
|
|
99
|
+
def sorted_price_keys(table)
|
|
100
|
+
cache_key = table.keys
|
|
101
|
+
return @sorted_price_keys if @sorted_price_keys_cache_key == cache_key
|
|
102
|
+
|
|
103
|
+
@sorted_price_keys_cache_key = cache_key
|
|
104
|
+
@sorted_price_keys = cache_key.sort_by { |key| -key.length }
|
|
105
|
+
end
|
|
71
106
|
end
|
|
72
107
|
end
|
|
73
108
|
end
|
|
@@ -3,7 +3,10 @@
|
|
|
3
3
|
module LlmCostTracker
|
|
4
4
|
class Railtie < Rails::Railtie
|
|
5
5
|
generators do
|
|
6
|
+
require_relative "generators/llm_cost_tracker/add_latency_ms_generator"
|
|
6
7
|
require_relative "generators/llm_cost_tracker/install_generator"
|
|
8
|
+
require_relative "generators/llm_cost_tracker/upgrade_cost_precision_generator"
|
|
9
|
+
require_relative "generators/llm_cost_tracker/upgrade_tags_to_jsonb_generator"
|
|
7
10
|
end
|
|
8
11
|
|
|
9
12
|
initializer "llm_cost_tracker.configure" do
|
|
@@ -5,7 +5,9 @@ module LlmCostTracker
|
|
|
5
5
|
class ActiveRecordStore
|
|
6
6
|
class << self
|
|
7
7
|
def save(event)
|
|
8
|
-
|
|
8
|
+
tags = stringify_tags(event[:tags] || {})
|
|
9
|
+
|
|
10
|
+
attributes = {
|
|
9
11
|
provider: event[:provider],
|
|
10
12
|
model: event[:model],
|
|
11
13
|
input_tokens: event[:input_tokens],
|
|
@@ -14,9 +16,12 @@ module LlmCostTracker
|
|
|
14
16
|
input_cost: event.dig(:cost, :input_cost),
|
|
15
17
|
output_cost: event.dig(:cost, :output_cost),
|
|
16
18
|
total_cost: event.dig(:cost, :total_cost),
|
|
17
|
-
tags:
|
|
19
|
+
tags: tags_for_storage(tags),
|
|
18
20
|
tracked_at: event[:tracked_at]
|
|
19
|
-
|
|
21
|
+
}
|
|
22
|
+
attributes[:latency_ms] = event[:latency_ms] if model_class.latency_column?
|
|
23
|
+
|
|
24
|
+
model_class.create!(attributes)
|
|
20
25
|
end
|
|
21
26
|
|
|
22
27
|
def monthly_total(time: Time.now.utc)
|
|
@@ -31,6 +36,22 @@ module LlmCostTracker
|
|
|
31
36
|
def model_class
|
|
32
37
|
LlmCostTracker::LlmApiCall
|
|
33
38
|
end
|
|
39
|
+
|
|
40
|
+
private
|
|
41
|
+
|
|
42
|
+
def stringify_tags(tags)
|
|
43
|
+
tags.transform_keys(&:to_s).transform_values { |value| stringify_tag_value(value) }
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def tags_for_storage(tags)
|
|
47
|
+
model_class.tags_json_column? ? tags : tags.to_json
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def stringify_tag_value(value)
|
|
51
|
+
return value.transform_values { |nested| stringify_tag_value(nested) } if value.is_a?(Hash)
|
|
52
|
+
|
|
53
|
+
value.to_s
|
|
54
|
+
end
|
|
34
55
|
end
|
|
35
56
|
end
|
|
36
57
|
end
|
|
@@ -5,21 +5,33 @@ module LlmCostTracker
|
|
|
5
5
|
EVENT_NAME = "llm_request.llm_cost_tracker"
|
|
6
6
|
|
|
7
7
|
class << self
|
|
8
|
-
def
|
|
8
|
+
def enforce_budget!
|
|
9
|
+
Budget.enforce!
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def record(provider:, model:, input_tokens:, output_tokens:, metadata: {}, latency_ms: nil)
|
|
13
|
+
usage = EventMetadata.usage_data(input_tokens, output_tokens, metadata)
|
|
14
|
+
|
|
9
15
|
cost_data = Pricing.cost_for(
|
|
10
16
|
model: model,
|
|
11
|
-
input_tokens: input_tokens,
|
|
12
|
-
output_tokens: output_tokens
|
|
17
|
+
input_tokens: usage[:input_tokens],
|
|
18
|
+
output_tokens: usage[:output_tokens],
|
|
19
|
+
cached_input_tokens: usage[:cached_input_tokens],
|
|
20
|
+
cache_read_input_tokens: usage[:cache_read_input_tokens],
|
|
21
|
+
cache_creation_input_tokens: usage[:cache_creation_input_tokens]
|
|
13
22
|
)
|
|
14
23
|
|
|
24
|
+
UnknownPricing.handle!(model) unless cost_data
|
|
25
|
+
|
|
15
26
|
event = {
|
|
16
27
|
provider: provider,
|
|
17
28
|
model: model,
|
|
18
|
-
input_tokens: input_tokens,
|
|
19
|
-
output_tokens: output_tokens,
|
|
20
|
-
total_tokens:
|
|
29
|
+
input_tokens: usage[:input_tokens],
|
|
30
|
+
output_tokens: usage[:output_tokens],
|
|
31
|
+
total_tokens: usage[:total_tokens],
|
|
21
32
|
cost: cost_data,
|
|
22
|
-
tags: LlmCostTracker.configuration.default_tags.merge(metadata),
|
|
33
|
+
tags: LlmCostTracker.configuration.default_tags.merge(EventMetadata.tags(metadata)),
|
|
34
|
+
latency_ms: normalized_latency_ms(latency_ms),
|
|
23
35
|
tracked_at: Time.now.utc
|
|
24
36
|
}
|
|
25
37
|
|
|
@@ -27,10 +39,8 @@ module LlmCostTracker
|
|
|
27
39
|
ActiveSupport::Notifications.instrument(EVENT_NAME, event)
|
|
28
40
|
|
|
29
41
|
# Store based on backend
|
|
30
|
-
store(event)
|
|
31
|
-
|
|
32
|
-
# Budget check
|
|
33
|
-
check_budget(event)
|
|
42
|
+
stored = store(event)
|
|
43
|
+
Budget.check!(event) unless stored == false
|
|
34
44
|
|
|
35
45
|
event
|
|
36
46
|
end
|
|
@@ -48,14 +58,22 @@ module LlmCostTracker
|
|
|
48
58
|
when :custom
|
|
49
59
|
config.custom_storage&.call(event)
|
|
50
60
|
end
|
|
61
|
+
|
|
62
|
+
true
|
|
63
|
+
rescue BudgetExceededError, UnknownPricingError
|
|
64
|
+
raise
|
|
65
|
+
rescue StandardError => e
|
|
66
|
+
handle_storage_error(e)
|
|
67
|
+
false
|
|
51
68
|
end
|
|
52
69
|
|
|
53
70
|
def log_event(event)
|
|
54
|
-
cost_str = event[:cost] ? "$#{'%.6f'
|
|
71
|
+
cost_str = event[:cost] ? "$#{format('%.6f', event[:cost][:total_cost])}" : "unknown"
|
|
55
72
|
|
|
56
73
|
message = "[LlmCostTracker] #{event[:provider]}/#{event[:model]} " \
|
|
57
74
|
"tokens=#{event[:input_tokens]}+#{event[:output_tokens]} " \
|
|
58
75
|
"cost=#{cost_str}"
|
|
76
|
+
message += " latency=#{event[:latency_ms]}ms" if event[:latency_ms]
|
|
59
77
|
message += " tags=#{event[:tags]}" unless event[:tags].empty?
|
|
60
78
|
|
|
61
79
|
case LlmCostTracker.configuration.log_level
|
|
@@ -71,36 +89,50 @@ module LlmCostTracker
|
|
|
71
89
|
warn(message) unless defined?(Rails)
|
|
72
90
|
end
|
|
73
91
|
|
|
92
|
+
def log_warning(message)
|
|
93
|
+
message = "[LlmCostTracker] #{message}"
|
|
94
|
+
|
|
95
|
+
if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
|
|
96
|
+
Rails.logger.warn(message)
|
|
97
|
+
else
|
|
98
|
+
warn message
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
74
102
|
def store_active_record(event)
|
|
75
|
-
|
|
103
|
+
require_relative "llm_api_call" unless defined?(LlmCostTracker::LlmApiCall)
|
|
104
|
+
require_relative "storage/active_record_store" unless defined?(LlmCostTracker::Storage::ActiveRecordStore)
|
|
76
105
|
|
|
77
106
|
LlmCostTracker::Storage::ActiveRecordStore.save(event)
|
|
107
|
+
rescue LoadError => e
|
|
108
|
+
raise Error, "ActiveRecord storage requires the active_record gem: #{e.message}"
|
|
78
109
|
end
|
|
79
110
|
|
|
80
|
-
def
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
111
|
+
def handle_storage_error(error)
|
|
112
|
+
case storage_error_behavior
|
|
113
|
+
when :ignore
|
|
114
|
+
nil
|
|
115
|
+
when :warn
|
|
116
|
+
log_warning("Storage failed; tracking event was not persisted: #{error.class}: #{error.message}")
|
|
117
|
+
when :raise
|
|
118
|
+
storage_error = StorageError.new(error)
|
|
119
|
+
raise storage_error
|
|
120
|
+
end
|
|
121
|
+
end
|
|
84
122
|
|
|
85
|
-
|
|
86
|
-
|
|
123
|
+
def storage_error_behavior
|
|
124
|
+
behavior = (LlmCostTracker.configuration.storage_error_behavior || :warn).to_sym
|
|
125
|
+
return behavior if Configuration::STORAGE_ERROR_BEHAVIORS.include?(behavior)
|
|
87
126
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
last_event: event
|
|
92
|
-
)
|
|
127
|
+
raise Error,
|
|
128
|
+
"Unknown storage_error_behavior: #{behavior.inspect}. " \
|
|
129
|
+
"Use one of: #{Configuration::STORAGE_ERROR_BEHAVIORS.join(', ')}"
|
|
93
130
|
end
|
|
94
131
|
|
|
95
|
-
def
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
LlmCostTracker::Storage::ActiveRecordStore.monthly_total + latest_cost
|
|
100
|
-
else
|
|
101
|
-
# For other backends, we can only report the latest cost
|
|
102
|
-
latest_cost
|
|
103
|
-
end
|
|
132
|
+
def normalized_latency_ms(latency_ms)
|
|
133
|
+
return nil if latency_ms.nil?
|
|
134
|
+
|
|
135
|
+
[latency_ms.to_i, 0].max
|
|
104
136
|
end
|
|
105
137
|
end
|
|
106
138
|
end
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LlmCostTracker
|
|
4
|
+
class UnknownPricing
|
|
5
|
+
class << self
|
|
6
|
+
def handle!(model)
|
|
7
|
+
model = normalized_model_name(model)
|
|
8
|
+
|
|
9
|
+
case behavior
|
|
10
|
+
when :ignore
|
|
11
|
+
nil
|
|
12
|
+
when :warn
|
|
13
|
+
warn_missing(model)
|
|
14
|
+
when :raise
|
|
15
|
+
raise UnknownPricingError.new(model: model)
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
private
|
|
20
|
+
|
|
21
|
+
def normalized_model_name(model)
|
|
22
|
+
model.to_s.empty? ? "unknown" : model.to_s
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def warn_missing(model)
|
|
26
|
+
message = "[LlmCostTracker] No pricing configured for model #{model.inspect}. " \
|
|
27
|
+
"Cost and budget enforcement will be skipped for this event. " \
|
|
28
|
+
"Add a pricing_overrides entry or set unknown_pricing_behavior."
|
|
29
|
+
|
|
30
|
+
if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
|
|
31
|
+
Rails.logger.warn(message)
|
|
32
|
+
else
|
|
33
|
+
Kernel.warn(message)
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def behavior
|
|
38
|
+
behavior = (LlmCostTracker.configuration.unknown_pricing_behavior || :warn).to_sym
|
|
39
|
+
return behavior if Configuration::UNKNOWN_PRICING_BEHAVIORS.include?(behavior)
|
|
40
|
+
|
|
41
|
+
raise Error,
|
|
42
|
+
"Unknown unknown_pricing_behavior: #{behavior.inspect}. " \
|
|
43
|
+
"Use one of: #{Configuration::UNKNOWN_PRICING_BEHAVIORS.join(', ')}"
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|