llm_cost_tracker 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +44 -0
  3. data/CHANGELOG.md +62 -0
  4. data/README.md +243 -26
  5. data/Rakefile +3 -1
  6. data/lib/llm_cost_tracker/budget.rb +97 -0
  7. data/lib/llm_cost_tracker/configuration.rb +37 -0
  8. data/lib/llm_cost_tracker/errors.rb +37 -0
  9. data/lib/llm_cost_tracker/event_metadata.rb +54 -0
  10. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_latency_ms_generator.rb +29 -0
  11. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_latency_ms_to_llm_api_calls.rb.erb +9 -0
  12. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_api_calls.rb.erb +16 -4
  13. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +14 -1
  14. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_cost_precision.rb.erb +15 -0
  15. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_tags_to_jsonb.rb.erb +41 -0
  16. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_cost_precision_generator.rb +29 -0
  17. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_tags_to_jsonb_generator.rb +29 -0
  18. data/lib/llm_cost_tracker/llm_api_call.rb +69 -1
  19. data/lib/llm_cost_tracker/middleware/faraday.rb +51 -14
  20. data/lib/llm_cost_tracker/parsers/anthropic.rb +10 -5
  21. data/lib/llm_cost_tracker/parsers/gemini.rb +13 -5
  22. data/lib/llm_cost_tracker/parsers/openai.rb +22 -7
  23. data/lib/llm_cost_tracker/parsers/openai_compatible.rb +44 -0
  24. data/lib/llm_cost_tracker/parsers/registry.rb +16 -7
  25. data/lib/llm_cost_tracker/price_registry.rb +69 -0
  26. data/lib/llm_cost_tracker/prices.json +51 -0
  27. data/lib/llm_cost_tracker/pricing.rb +76 -41
  28. data/lib/llm_cost_tracker/railtie.rb +3 -0
  29. data/lib/llm_cost_tracker/storage/active_record_store.rb +24 -3
  30. data/lib/llm_cost_tracker/tracker.rb +65 -33
  31. data/lib/llm_cost_tracker/unknown_pricing.rb +47 -0
  32. data/lib/llm_cost_tracker/version.rb +1 -1
  33. data/lib/llm_cost_tracker.rb +33 -5
  34. data/llm_cost_tracker.gemspec +9 -7
  35. metadata +38 -23
@@ -1,16 +1,19 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "json"
4
+ require "uri"
5
+
6
+ require_relative "base"
4
7
 
5
8
  module LlmCostTracker
6
9
  module Parsers
7
10
  class Openai < Base
8
11
  HOSTS = %w[api.openai.com].freeze
9
- TRACKED_PATHS = %w[/v1/chat/completions /v1/completions /v1/embeddings].freeze
12
+ TRACKED_PATHS = %w[/v1/chat/completions /v1/completions /v1/embeddings /v1/responses].freeze
10
13
 
11
14
  def match?(url)
12
15
  uri = URI.parse(url.to_s)
13
- HOSTS.include?(uri.host) && TRACKED_PATHS.any? { |p| uri.path.start_with?(p) }
16
+ HOSTS.include?(uri.host.to_s.downcase) && TRACKED_PATHS.include?(uri.path)
14
17
  rescue URI::InvalidURIError
15
18
  false
16
19
  end
@@ -25,12 +28,24 @@ module LlmCostTracker
25
28
  request = safe_json_parse(request_body)
26
29
 
27
30
  {
28
- provider: "openai",
31
+ provider: provider_for(request_url),
29
32
  model: response["model"] || request["model"],
30
- input_tokens: usage["prompt_tokens"] || 0,
31
- output_tokens: usage["completion_tokens"] || 0,
32
- total_tokens: usage["total_tokens"] || 0
33
- }
33
+ input_tokens: usage["prompt_tokens"] || usage["input_tokens"] || 0,
34
+ output_tokens: usage["completion_tokens"] || usage["output_tokens"] || 0,
35
+ total_tokens: usage["total_tokens"] || 0,
36
+ cached_input_tokens: cached_input_tokens(usage)
37
+ }.compact
38
+ end
39
+
40
+ private
41
+
42
+ def provider_for(_request_url)
43
+ "openai"
44
+ end
45
+
46
+ def cached_input_tokens(usage)
47
+ details = usage["prompt_tokens_details"] || usage["input_tokens_details"] || {}
48
+ details["cached_tokens"]
34
49
  end
35
50
  end
36
51
  end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "openai"
4
+
5
+ module LlmCostTracker
6
+ module Parsers
7
+ class OpenaiCompatible < Openai
8
+ TRACKED_PATH_SUFFIXES = %w[/chat/completions /completions /embeddings /responses].freeze
9
+
10
+ def match?(url)
11
+ uri = URI.parse(url.to_s)
12
+ !provider_for_host(uri.host).nil? && tracked_path?(uri.path)
13
+ rescue URI::InvalidURIError
14
+ false
15
+ end
16
+
17
+ private
18
+
19
+ def provider_for(request_url)
20
+ uri = URI.parse(request_url.to_s)
21
+ provider_for_host(uri.host) || "openai_compatible"
22
+ rescue URI::InvalidURIError
23
+ "openai_compatible"
24
+ end
25
+
26
+ def provider_for_host(host)
27
+ host = host.to_s.downcase
28
+ provider_name = configured_providers[host] ||
29
+ configured_providers.find do |configured_host, _provider|
30
+ configured_host.to_s.downcase == host
31
+ end&.last
32
+ provider_name&.to_s
33
+ end
34
+
35
+ def configured_providers
36
+ LlmCostTracker.configuration.openai_compatible_providers
37
+ end
38
+
39
+ def tracked_path?(path)
40
+ TRACKED_PATH_SUFFIXES.any? { |suffix| path == suffix || path.end_with?(suffix) }
41
+ end
42
+ end
43
+ end
44
+ end
@@ -4,16 +4,14 @@ module LlmCostTracker
4
4
  module Parsers
5
5
  class Registry
6
6
  class << self
7
+ PARSERS_MUTEX = Mutex.new
8
+
7
9
  def parsers
8
- @parsers ||= [
9
- Openai.new,
10
- Anthropic.new,
11
- Gemini.new
12
- ]
10
+ @parsers || PARSERS_MUTEX.synchronize { @parsers ||= default_parsers }
13
11
  end
14
12
 
15
13
  def register(parser)
16
- parsers.unshift(parser)
14
+ PARSERS_MUTEX.synchronize { parsers.unshift(parser) }
17
15
  end
18
16
 
19
17
  def find_for(url)
@@ -21,7 +19,18 @@ module LlmCostTracker
21
19
  end
22
20
 
23
21
  def reset!
24
- @parsers = nil
22
+ PARSERS_MUTEX.synchronize { @parsers = nil }
23
+ end
24
+
25
+ private
26
+
27
+ def default_parsers
28
+ [
29
+ Openai.new,
30
+ OpenaiCompatible.new,
31
+ Anthropic.new,
32
+ Gemini.new
33
+ ]
25
34
  end
26
35
  end
27
36
  end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "yaml"
5
+
6
+ module LlmCostTracker
7
+ module PriceRegistry
8
+ DEFAULT_PRICES_PATH = File.expand_path("prices.json", __dir__)
9
+ PRICE_KEYS = %w[input cached_input output cache_read_input cache_creation_input].freeze
10
+ NORMALIZE_PRICE_ENTRY = lambda do |price|
11
+ (price || {}).each_with_object({}) do |(key, value), normalized|
12
+ key = key.to_s
13
+ normalized[key.to_sym] = Float(value) if PRICE_KEYS.include?(key)
14
+ end
15
+ end
16
+ NORMALIZE_PRICE_TABLE = lambda do |table|
17
+ (table || {}).each_with_object({}) do |(model, price), normalized|
18
+ normalized[model.to_s] = NORMALIZE_PRICE_ENTRY.call(price)
19
+ end
20
+ end
21
+ RAW_REGISTRY = JSON.parse(File.read(DEFAULT_PRICES_PATH)).freeze
22
+ PRICE_METADATA = RAW_REGISTRY.fetch("metadata", {}).freeze
23
+ BUILTIN_PRICES = NORMALIZE_PRICE_TABLE.call(RAW_REGISTRY.fetch("models", {})).freeze
24
+
25
+ class << self
26
+ def builtin_prices
27
+ BUILTIN_PRICES
28
+ end
29
+
30
+ def metadata
31
+ PRICE_METADATA
32
+ end
33
+
34
+ def normalize_price_table(table)
35
+ NORMALIZE_PRICE_TABLE.call(table)
36
+ end
37
+
38
+ def file_prices(path)
39
+ return {} unless path
40
+
41
+ path = path.to_s
42
+ cache_key = [path, File.mtime(path).to_f]
43
+ return @file_prices if @file_prices_cache_key == cache_key
44
+
45
+ @file_prices_cache_key = cache_key
46
+ @file_prices = normalize_price_table(price_file_models(load_price_file(path)))
47
+ rescue Errno::ENOENT, JSON::ParserError, Psych::Exception, ArgumentError, TypeError => e
48
+ raise Error, "Unable to load prices_file #{path.inspect}: #{e.message}"
49
+ end
50
+
51
+ private
52
+
53
+ def load_price_file(path)
54
+ contents = File.read(path)
55
+ return YAML.safe_load(contents, aliases: false) || {} if yaml_file?(path)
56
+
57
+ JSON.parse(contents)
58
+ end
59
+
60
+ def yaml_file?(path)
61
+ %w[.yaml .yml].include?(File.extname(path).downcase)
62
+ end
63
+
64
+ def price_file_models(registry)
65
+ registry.fetch("models", registry)
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,51 @@
1
+ {
2
+ "metadata": {
3
+ "updated_at": "2026-04-18",
4
+ "currency": "USD",
5
+ "unit": "1M tokens",
6
+ "source_urls": [
7
+ "https://openai.com/api/pricing",
8
+ "https://www.anthropic.com/pricing",
9
+ "https://ai.google.dev/gemini-api/docs/pricing"
10
+ ]
11
+ },
12
+ "models": {
13
+ "gpt-5.2": { "input": 1.75, "cached_input": 0.175, "output": 14.0 },
14
+ "gpt-5.1": { "input": 1.25, "cached_input": 0.125, "output": 10.0 },
15
+ "gpt-5": { "input": 1.25, "cached_input": 0.125, "output": 10.0 },
16
+ "gpt-5-mini": { "input": 0.25, "cached_input": 0.025, "output": 2.0 },
17
+ "gpt-5-nano": { "input": 0.05, "cached_input": 0.005, "output": 0.4 },
18
+ "gpt-4.1": { "input": 2.0, "cached_input": 0.5, "output": 8.0 },
19
+ "gpt-4.1-mini": { "input": 0.4, "cached_input": 0.1, "output": 1.6 },
20
+ "gpt-4.1-nano": { "input": 0.1, "cached_input": 0.025, "output": 0.4 },
21
+ "gpt-4o-2024-05-13": { "input": 5.0, "output": 15.0 },
22
+ "gpt-4o": { "input": 2.5, "cached_input": 1.25, "output": 10.0 },
23
+ "gpt-4o-mini": { "input": 0.15, "cached_input": 0.075, "output": 0.6 },
24
+ "gpt-4-turbo": { "input": 10.0, "output": 30.0 },
25
+ "gpt-4": { "input": 30.0, "output": 60.0 },
26
+ "gpt-3.5-turbo": { "input": 0.5, "output": 1.5 },
27
+ "o1": { "input": 15.0, "cached_input": 7.5, "output": 60.0 },
28
+ "o1-mini": { "input": 1.1, "cached_input": 0.55, "output": 4.4 },
29
+ "o3": { "input": 2.0, "cached_input": 0.5, "output": 8.0 },
30
+ "o3-mini": { "input": 1.1, "cached_input": 0.55, "output": 4.4 },
31
+ "o4-mini": { "input": 1.1, "cached_input": 0.275, "output": 4.4 },
32
+ "claude-sonnet-4-6": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_creation_input": 3.75 },
33
+ "claude-opus-4-6": { "input": 5.0, "output": 25.0, "cache_read_input": 0.5, "cache_creation_input": 6.25 },
34
+ "claude-opus-4-1": { "input": 15.0, "output": 75.0, "cache_read_input": 1.5, "cache_creation_input": 18.75 },
35
+ "claude-opus-4": { "input": 15.0, "output": 75.0, "cache_read_input": 1.5, "cache_creation_input": 18.75 },
36
+ "claude-sonnet-4-5": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_creation_input": 3.75 },
37
+ "claude-sonnet-4": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_creation_input": 3.75 },
38
+ "claude-haiku-4-5": { "input": 1.0, "output": 5.0, "cache_read_input": 0.1, "cache_creation_input": 1.25 },
39
+ "claude-3-7-sonnet": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_creation_input": 3.75 },
40
+ "claude-3-5-sonnet": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_creation_input": 3.75 },
41
+ "claude-3-5-haiku": { "input": 0.8, "output": 4.0, "cache_read_input": 0.08, "cache_creation_input": 1.0 },
42
+ "claude-3-opus": { "input": 15.0, "output": 75.0, "cache_read_input": 1.5, "cache_creation_input": 18.75 },
43
+ "gemini-2.5-pro": { "input": 1.25, "cached_input": 0.125, "output": 10.0 },
44
+ "gemini-2.5-flash": { "input": 0.3, "cached_input": 0.03, "output": 2.5 },
45
+ "gemini-2.5-flash-lite": { "input": 0.1, "cached_input": 0.01, "output": 0.4 },
46
+ "gemini-2.0-flash": { "input": 0.1, "cached_input": 0.025, "output": 0.4 },
47
+ "gemini-2.0-flash-lite": { "input": 0.075, "output": 0.3 },
48
+ "gemini-1.5-pro": { "input": 1.25, "output": 5.0 },
49
+ "gemini-1.5-flash": { "input": 0.075, "output": 0.3 }
50
+ }
51
+ }
@@ -2,72 +2,107 @@
2
2
 
3
3
  module LlmCostTracker
4
4
  # Prices per 1M tokens in USD.
5
- # Updated: April 2026. Override via configuration.
6
5
  module Pricing
7
- PRICES = {
8
- # OpenAI
9
- "gpt-4o" => { input: 2.50, output: 10.00 },
10
- "gpt-4o-mini" => { input: 0.15, output: 0.60 },
11
- "gpt-4-turbo" => { input: 10.00, output: 30.00 },
12
- "gpt-4" => { input: 30.00, output: 60.00 },
13
- "gpt-3.5-turbo" => { input: 0.50, output: 1.50 },
14
- "o1" => { input: 15.00, output: 60.00 },
15
- "o1-mini" => { input: 3.00, output: 12.00 },
16
- "o3-mini" => { input: 1.10, output: 4.40 },
17
-
18
- # Anthropic
19
- "claude-sonnet-4-6" => { input: 3.00, output: 15.00 },
20
- "claude-opus-4-6" => { input: 15.00, output: 75.00 },
21
- "claude-haiku-4-5" => { input: 0.80, output: 4.00 },
22
- "claude-3-5-sonnet-20241022" => { input: 3.00, output: 15.00 },
23
- "claude-3-5-haiku-20241022" => { input: 0.80, output: 4.00 },
24
- "claude-3-opus-20240229" => { input: 15.00, output: 75.00 },
25
-
26
- # Google Gemini
27
- "gemini-2.5-pro" => { input: 1.25, output: 10.00 },
28
- "gemini-2.5-flash" => { input: 0.15, output: 0.60 },
29
- "gemini-2.0-flash" => { input: 0.10, output: 0.40 },
30
- "gemini-1.5-pro" => { input: 1.25, output: 5.00 },
31
- "gemini-1.5-flash" => { input: 0.075, output: 0.30 },
32
- }.freeze
6
+ PRICES = PriceRegistry.builtin_prices
33
7
 
34
8
  class << self
35
- def cost_for(model:, input_tokens:, output_tokens:)
9
+ def cost_for(model:, input_tokens:, output_tokens:, cached_input_tokens: 0,
10
+ cache_read_input_tokens: 0, cache_creation_input_tokens: 0)
36
11
  prices = lookup(model)
37
12
  return nil unless prices
38
13
 
39
- input_cost = (input_tokens.to_f / 1_000_000) * prices[:input]
40
- output_cost = (output_tokens.to_f / 1_000_000) * prices[:output]
14
+ token_counts = normalized_token_counts(input_tokens, output_tokens, cached_input_tokens,
15
+ cache_read_input_tokens, cache_creation_input_tokens)
16
+ costs = calculate_costs(token_counts, prices)
41
17
 
42
18
  {
43
- input_cost: input_cost.round(8),
44
- output_cost: output_cost.round(8),
45
- total_cost: (input_cost + output_cost).round(8),
19
+ input_cost: costs[:input].round(8),
20
+ cached_input_cost: costs[:cached_input].round(8),
21
+ cache_read_input_cost: costs[:cache_read_input].round(8),
22
+ cache_creation_input_cost: costs[:cache_creation_input].round(8),
23
+ output_cost: costs[:output].round(8),
24
+ total_cost: costs.values.sum.round(8),
46
25
  currency: "USD"
47
26
  }
48
27
  end
49
28
 
50
29
  def lookup(model)
51
- overrides = LlmCostTracker.configuration.pricing_overrides
52
- overrides[model] || PRICES[model] || fuzzy_match(model)
30
+ table = prices
31
+ model_name = model.to_s
32
+ normalized_model = normalize_model_name(model_name)
33
+
34
+ table[model_name] || table[normalized_model] || fuzzy_match(model_name, normalized_model, table)
53
35
  end
54
36
 
55
37
  def models
56
- PRICES.keys | LlmCostTracker.configuration.pricing_overrides.keys
38
+ prices.keys
39
+ end
40
+
41
+ def metadata
42
+ PriceRegistry.metadata
43
+ end
44
+
45
+ def prices
46
+ PRICES
47
+ .merge(PriceRegistry.file_prices(LlmCostTracker.configuration.prices_file))
48
+ .merge(PriceRegistry.normalize_price_table(LlmCostTracker.configuration.pricing_overrides))
57
49
  end
58
50
 
59
51
  private
60
52
 
61
- # Try to match model names like "gpt-4o-2024-08-06" to "gpt-4o"
62
- def fuzzy_match(model)
63
- return nil unless model
53
+ def normalized_token_counts(input_tokens, output_tokens, cached_input_tokens,
54
+ cache_read_input_tokens, cache_creation_input_tokens)
55
+ cached_input_tokens = cached_input_tokens.to_i
56
+
57
+ {
58
+ input: [input_tokens.to_i - cached_input_tokens, 0].max,
59
+ cached_input: cached_input_tokens,
60
+ cache_read_input: cache_read_input_tokens.to_i,
61
+ cache_creation_input: cache_creation_input_tokens.to_i,
62
+ output: output_tokens.to_i
63
+ }
64
+ end
65
+
66
+ def calculate_costs(token_counts, prices)
67
+ {
68
+ input: token_cost(token_counts[:input], prices[:input]),
69
+ cached_input: token_cost(token_counts[:cached_input], prices[:cached_input] || prices[:input]),
70
+ cache_read_input: token_cost(
71
+ token_counts[:cache_read_input],
72
+ prices[:cache_read_input] || prices[:cached_input] || prices[:input]
73
+ ),
74
+ cache_creation_input: token_cost(
75
+ token_counts[:cache_creation_input],
76
+ prices[:cache_creation_input] || prices[:input]
77
+ ),
78
+ output: token_cost(token_counts[:output], prices[:output])
79
+ }
80
+ end
81
+
82
+ def token_cost(tokens, per_million_price)
83
+ (tokens.to_f / 1_000_000) * per_million_price
84
+ end
64
85
 
65
- PRICES.each do |key, value|
66
- return value if model.start_with?(key)
86
+ def normalize_model_name(model)
87
+ model.to_s.split("/").last
88
+ end
89
+
90
+ # Try to match model names like "gpt-4o-2024-08-06" to "gpt-4o".
91
+ def fuzzy_match(model, normalized_model, table)
92
+ sorted_price_keys(table).each do |key|
93
+ return table[key] if model.start_with?(key) || normalized_model.start_with?(key)
67
94
  end
68
95
 
69
96
  nil
70
97
  end
98
+
99
+ def sorted_price_keys(table)
100
+ cache_key = table.keys
101
+ return @sorted_price_keys if @sorted_price_keys_cache_key == cache_key
102
+
103
+ @sorted_price_keys_cache_key = cache_key
104
+ @sorted_price_keys = cache_key.sort_by { |key| -key.length }
105
+ end
71
106
  end
72
107
  end
73
108
  end
@@ -3,7 +3,10 @@
3
3
  module LlmCostTracker
4
4
  class Railtie < Rails::Railtie
5
5
  generators do
6
+ require_relative "generators/llm_cost_tracker/add_latency_ms_generator"
6
7
  require_relative "generators/llm_cost_tracker/install_generator"
8
+ require_relative "generators/llm_cost_tracker/upgrade_cost_precision_generator"
9
+ require_relative "generators/llm_cost_tracker/upgrade_tags_to_jsonb_generator"
7
10
  end
8
11
 
9
12
  initializer "llm_cost_tracker.configure" do
@@ -5,7 +5,9 @@ module LlmCostTracker
5
5
  class ActiveRecordStore
6
6
  class << self
7
7
  def save(event)
8
- model_class.create!(
8
+ tags = stringify_tags(event[:tags] || {})
9
+
10
+ attributes = {
9
11
  provider: event[:provider],
10
12
  model: event[:model],
11
13
  input_tokens: event[:input_tokens],
@@ -14,9 +16,12 @@ module LlmCostTracker
14
16
  input_cost: event.dig(:cost, :input_cost),
15
17
  output_cost: event.dig(:cost, :output_cost),
16
18
  total_cost: event.dig(:cost, :total_cost),
17
- tags: event[:tags].to_json,
19
+ tags: tags_for_storage(tags),
18
20
  tracked_at: event[:tracked_at]
19
- )
21
+ }
22
+ attributes[:latency_ms] = event[:latency_ms] if model_class.latency_column?
23
+
24
+ model_class.create!(attributes)
20
25
  end
21
26
 
22
27
  def monthly_total(time: Time.now.utc)
@@ -31,6 +36,22 @@ module LlmCostTracker
31
36
  def model_class
32
37
  LlmCostTracker::LlmApiCall
33
38
  end
39
+
40
+ private
41
+
42
+ def stringify_tags(tags)
43
+ tags.transform_keys(&:to_s).transform_values { |value| stringify_tag_value(value) }
44
+ end
45
+
46
+ def tags_for_storage(tags)
47
+ model_class.tags_json_column? ? tags : tags.to_json
48
+ end
49
+
50
+ def stringify_tag_value(value)
51
+ return value.transform_values { |nested| stringify_tag_value(nested) } if value.is_a?(Hash)
52
+
53
+ value.to_s
54
+ end
34
55
  end
35
56
  end
36
57
  end
@@ -5,21 +5,33 @@ module LlmCostTracker
5
5
  EVENT_NAME = "llm_request.llm_cost_tracker"
6
6
 
7
7
  class << self
8
- def record(provider:, model:, input_tokens:, output_tokens:, metadata: {})
8
+ def enforce_budget!
9
+ Budget.enforce!
10
+ end
11
+
12
+ def record(provider:, model:, input_tokens:, output_tokens:, metadata: {}, latency_ms: nil)
13
+ usage = EventMetadata.usage_data(input_tokens, output_tokens, metadata)
14
+
9
15
  cost_data = Pricing.cost_for(
10
16
  model: model,
11
- input_tokens: input_tokens,
12
- output_tokens: output_tokens
17
+ input_tokens: usage[:input_tokens],
18
+ output_tokens: usage[:output_tokens],
19
+ cached_input_tokens: usage[:cached_input_tokens],
20
+ cache_read_input_tokens: usage[:cache_read_input_tokens],
21
+ cache_creation_input_tokens: usage[:cache_creation_input_tokens]
13
22
  )
14
23
 
24
+ UnknownPricing.handle!(model) unless cost_data
25
+
15
26
  event = {
16
27
  provider: provider,
17
28
  model: model,
18
- input_tokens: input_tokens,
19
- output_tokens: output_tokens,
20
- total_tokens: input_tokens + output_tokens,
29
+ input_tokens: usage[:input_tokens],
30
+ output_tokens: usage[:output_tokens],
31
+ total_tokens: usage[:total_tokens],
21
32
  cost: cost_data,
22
- tags: LlmCostTracker.configuration.default_tags.merge(metadata),
33
+ tags: LlmCostTracker.configuration.default_tags.merge(EventMetadata.tags(metadata)),
34
+ latency_ms: normalized_latency_ms(latency_ms),
23
35
  tracked_at: Time.now.utc
24
36
  }
25
37
 
@@ -27,10 +39,8 @@ module LlmCostTracker
27
39
  ActiveSupport::Notifications.instrument(EVENT_NAME, event)
28
40
 
29
41
  # Store based on backend
30
- store(event)
31
-
32
- # Budget check
33
- check_budget(event)
42
+ stored = store(event)
43
+ Budget.check!(event) unless stored == false
34
44
 
35
45
  event
36
46
  end
@@ -48,14 +58,22 @@ module LlmCostTracker
48
58
  when :custom
49
59
  config.custom_storage&.call(event)
50
60
  end
61
+
62
+ true
63
+ rescue BudgetExceededError, UnknownPricingError
64
+ raise
65
+ rescue StandardError => e
66
+ handle_storage_error(e)
67
+ false
51
68
  end
52
69
 
53
70
  def log_event(event)
54
- cost_str = event[:cost] ? "$#{'%.6f' % event[:cost][:total_cost]}" : "unknown"
71
+ cost_str = event[:cost] ? "$#{format('%.6f', event[:cost][:total_cost])}" : "unknown"
55
72
 
56
73
  message = "[LlmCostTracker] #{event[:provider]}/#{event[:model]} " \
57
74
  "tokens=#{event[:input_tokens]}+#{event[:output_tokens]} " \
58
75
  "cost=#{cost_str}"
76
+ message += " latency=#{event[:latency_ms]}ms" if event[:latency_ms]
59
77
  message += " tags=#{event[:tags]}" unless event[:tags].empty?
60
78
 
61
79
  case LlmCostTracker.configuration.log_level
@@ -71,36 +89,50 @@ module LlmCostTracker
71
89
  warn(message) unless defined?(Rails)
72
90
  end
73
91
 
92
+ def log_warning(message)
93
+ message = "[LlmCostTracker] #{message}"
94
+
95
+ if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
96
+ Rails.logger.warn(message)
97
+ else
98
+ warn message
99
+ end
100
+ end
101
+
74
102
  def store_active_record(event)
75
- return unless defined?(LlmCostTracker::Storage::ActiveRecordStore)
103
+ require_relative "llm_api_call" unless defined?(LlmCostTracker::LlmApiCall)
104
+ require_relative "storage/active_record_store" unless defined?(LlmCostTracker::Storage::ActiveRecordStore)
76
105
 
77
106
  LlmCostTracker::Storage::ActiveRecordStore.save(event)
107
+ rescue LoadError => e
108
+ raise Error, "ActiveRecord storage requires the active_record gem: #{e.message}"
78
109
  end
79
110
 
80
- def check_budget(event)
81
- config = LlmCostTracker.configuration
82
- return unless config.monthly_budget && config.on_budget_exceeded
83
- return unless event[:cost]
111
+ def handle_storage_error(error)
112
+ case storage_error_behavior
113
+ when :ignore
114
+ nil
115
+ when :warn
116
+ log_warning("Storage failed; tracking event was not persisted: #{error.class}: #{error.message}")
117
+ when :raise
118
+ storage_error = StorageError.new(error)
119
+ raise storage_error
120
+ end
121
+ end
84
122
 
85
- monthly_total = calculate_monthly_total(event[:cost][:total_cost])
86
- return unless monthly_total > config.monthly_budget
123
+ def storage_error_behavior
124
+ behavior = (LlmCostTracker.configuration.storage_error_behavior || :warn).to_sym
125
+ return behavior if Configuration::STORAGE_ERROR_BEHAVIORS.include?(behavior)
87
126
 
88
- config.on_budget_exceeded.call(
89
- monthly_total: monthly_total,
90
- budget: config.monthly_budget,
91
- last_event: event
92
- )
127
+ raise Error,
128
+ "Unknown storage_error_behavior: #{behavior.inspect}. " \
129
+ "Use one of: #{Configuration::STORAGE_ERROR_BEHAVIORS.join(', ')}"
93
130
  end
94
131
 
95
- def calculate_monthly_total(latest_cost)
96
- # For :active_record backend, query the DB
97
- if LlmCostTracker.configuration.active_record? &&
98
- defined?(LlmCostTracker::Storage::ActiveRecordStore)
99
- LlmCostTracker::Storage::ActiveRecordStore.monthly_total + latest_cost
100
- else
101
- # For other backends, we can only report the latest cost
102
- latest_cost
103
- end
132
+ def normalized_latency_ms(latency_ms)
133
+ return nil if latency_ms.nil?
134
+
135
+ [latency_ms.to_i, 0].max
104
136
  end
105
137
  end
106
138
  end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LlmCostTracker
4
+ class UnknownPricing
5
+ class << self
6
+ def handle!(model)
7
+ model = normalized_model_name(model)
8
+
9
+ case behavior
10
+ when :ignore
11
+ nil
12
+ when :warn
13
+ warn_missing(model)
14
+ when :raise
15
+ raise UnknownPricingError.new(model: model)
16
+ end
17
+ end
18
+
19
+ private
20
+
21
+ def normalized_model_name(model)
22
+ model.to_s.empty? ? "unknown" : model.to_s
23
+ end
24
+
25
+ def warn_missing(model)
26
+ message = "[LlmCostTracker] No pricing configured for model #{model.inspect}. " \
27
+ "Cost and budget enforcement will be skipped for this event. " \
28
+ "Add a pricing_overrides entry or set unknown_pricing_behavior."
29
+
30
+ if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
31
+ Rails.logger.warn(message)
32
+ else
33
+ Kernel.warn(message)
34
+ end
35
+ end
36
+
37
+ def behavior
38
+ behavior = (LlmCostTracker.configuration.unknown_pricing_behavior || :warn).to_sym
39
+ return behavior if Configuration::UNKNOWN_PRICING_BEHAVIORS.include?(behavior)
40
+
41
+ raise Error,
42
+ "Unknown unknown_pricing_behavior: #{behavior.inspect}. " \
43
+ "Use one of: #{Configuration::UNKNOWN_PRICING_BEHAVIORS.join(', ')}"
44
+ end
45
+ end
46
+ end
47
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module LlmCostTracker
4
- VERSION = "0.1.0"
4
+ VERSION = "0.1.2"
5
5
  end