llm_cost_tracker 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +35 -0
  3. data/README.md +34 -14
  4. data/app/services/llm_cost_tracker/dashboard/data_quality.rb +101 -19
  5. data/app/views/llm_cost_tracker/data_quality/index.html.erb +65 -0
  6. data/lib/llm_cost_tracker/budget.rb +85 -21
  7. data/lib/llm_cost_tracker/configuration.rb +4 -0
  8. data/lib/llm_cost_tracker/cost.rb +1 -2
  9. data/lib/llm_cost_tracker/errors.rb +22 -3
  10. data/lib/llm_cost_tracker/event.rb +4 -0
  11. data/lib/llm_cost_tracker/event_metadata.rb +21 -15
  12. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_period_totals_generator.rb +29 -0
  13. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_usage_breakdown_generator.rb +29 -0
  14. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_period_totals_to_llm_cost_tracker.rb.erb +66 -0
  15. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_usage_breakdown_to_llm_api_calls.rb.erb +29 -0
  16. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_api_calls.rb.erb +15 -0
  17. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +3 -1
  18. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/llm_cost_tracker_prices.yml.erb +11 -3
  19. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_tags_to_jsonb.rb.erb +1 -0
  20. data/lib/llm_cost_tracker/middleware/faraday.rb +27 -9
  21. data/lib/llm_cost_tracker/parsed_usage.rb +16 -7
  22. data/lib/llm_cost_tracker/parsers/anthropic.rb +7 -6
  23. data/lib/llm_cost_tracker/parsers/base.rb +2 -1
  24. data/lib/llm_cost_tracker/parsers/gemini.rb +5 -2
  25. data/lib/llm_cost_tracker/parsers/openai_usage.rb +18 -5
  26. data/lib/llm_cost_tracker/period_total.rb +9 -0
  27. data/lib/llm_cost_tracker/price_registry.rb +14 -4
  28. data/lib/llm_cost_tracker/price_sync/merger.rb +1 -1
  29. data/lib/llm_cost_tracker/price_sync/raw_price.rb +3 -5
  30. data/lib/llm_cost_tracker/price_sync/sources/litellm.rb +2 -3
  31. data/lib/llm_cost_tracker/price_sync/sources/open_router.rb +2 -3
  32. data/lib/llm_cost_tracker/prices.json +30 -30
  33. data/lib/llm_cost_tracker/pricing.rb +44 -32
  34. data/lib/llm_cost_tracker/railtie.rb +2 -0
  35. data/lib/llm_cost_tracker/storage/active_record_rollups.rb +122 -0
  36. data/lib/llm_cost_tracker/storage/active_record_store.rb +38 -13
  37. data/lib/llm_cost_tracker/stream_collector.rb +5 -3
  38. data/lib/llm_cost_tracker/tags_column.rb +19 -0
  39. data/lib/llm_cost_tracker/tracker.rb +58 -32
  40. data/lib/llm_cost_tracker/unknown_pricing.rb +14 -0
  41. data/lib/llm_cost_tracker/usage_breakdown.rb +30 -0
  42. data/lib/llm_cost_tracker/version.rb +1 -1
  43. data/lib/llm_cost_tracker.rb +12 -3
  44. metadata +10 -4
  45. data/llm_cost_tracker.gemspec +0 -50
@@ -3,32 +3,31 @@
3
3
  module LlmCostTracker
4
4
  module EventMetadata
5
5
  INTERNAL_TAG_KEYS = %w[
6
- cache_creation_input_tokens
7
- cache_creation_tokens
8
6
  cache_read_input_tokens
9
- cache_read_tokens
10
- cached_input_tokens
7
+ cache_write_input_tokens
8
+ hidden_output_tokens
11
9
  input_tokens
12
10
  output_tokens
11
+ pricing_mode
13
12
  provider_response_id
14
- reasoning_tokens
15
13
  total_tokens
16
14
  ].freeze
17
15
 
18
16
  class << self
19
17
  def usage_data(input_tokens, output_tokens, metadata)
20
18
  metadata = metadata.to_h.symbolize_keys
21
- cache_read = first_integer(metadata, :cache_read_input_tokens, :cache_read_tokens)
22
- cache_creation = first_integer(metadata, :cache_creation_input_tokens, :cache_creation_tokens)
23
-
24
- {
25
- input_tokens: input_tokens.to_i,
26
- output_tokens: output_tokens.to_i,
27
- cached_input_tokens: metadata[:cached_input_tokens].to_i,
19
+ cache_read = first_integer(metadata, :cache_read_input_tokens)
20
+ cache_write = first_integer(metadata, :cache_write_input_tokens)
21
+ hidden_output = first_integer(metadata, :hidden_output_tokens)
22
+ breakdown = UsageBreakdown.build(
23
+ input_tokens: input_tokens,
24
+ output_tokens: output_tokens,
28
25
  cache_read_input_tokens: cache_read,
29
- cache_creation_input_tokens: cache_creation,
30
- total_tokens: input_tokens.to_i + output_tokens.to_i + cache_read + cache_creation
31
- }
26
+ cache_write_input_tokens: cache_write,
27
+ hidden_output_tokens: hidden_output
28
+ )
29
+
30
+ breakdown.to_h.merge(pricing_mode: normalized_pricing_mode(metadata[:pricing_mode])).compact
32
31
  end
33
32
 
34
33
  def tags(metadata)
@@ -41,6 +40,13 @@ module LlmCostTracker
41
40
  keys.each { |key| return metadata[key].to_i unless metadata[key].nil? }
42
41
  0
43
42
  end
43
+
44
+ def normalized_pricing_mode(value)
45
+ return nil if value.nil?
46
+
47
+ mode = value.to_s.strip
48
+ mode.empty? || mode == "standard" ? nil : mode
49
+ end
44
50
  end
45
51
  end
46
52
  end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rails/generators"
4
+ require "rails/generators/active_record"
5
+
6
+ module LlmCostTracker
7
+ module Generators
8
+ class AddPeriodTotalsGenerator < Rails::Generators::Base
9
+ include ActiveRecord::Generators::Migration
10
+
11
+ source_root File.expand_path("templates", __dir__)
12
+
13
+ desc "Creates a migration to add llm_cost_tracker_period_totals"
14
+
15
+ def create_migration_file
16
+ migration_template(
17
+ "add_period_totals_to_llm_cost_tracker.rb.erb",
18
+ "db/migrate/add_period_totals_to_llm_cost_tracker.rb"
19
+ )
20
+ end
21
+
22
+ private
23
+
24
+ def migration_version
25
+ "[#{ActiveRecord::VERSION::MAJOR}.#{ActiveRecord::VERSION::MINOR}]"
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rails/generators"
4
+ require "rails/generators/active_record"
5
+
6
+ module LlmCostTracker
7
+ module Generators
8
+ class AddUsageBreakdownGenerator < Rails::Generators::Base
9
+ include ActiveRecord::Generators::Migration
10
+
11
+ source_root File.expand_path("templates", __dir__)
12
+
13
+ desc "Creates a migration to add usage and cost breakdown columns to llm_api_calls"
14
+
15
+ def create_migration_file
16
+ migration_template(
17
+ "add_usage_breakdown_to_llm_api_calls.rb.erb",
18
+ "db/migrate/add_usage_breakdown_to_llm_api_calls.rb"
19
+ )
20
+ end
21
+
22
+ private
23
+
24
+ def migration_version
25
+ "[#{ActiveRecord::VERSION::MAJOR}.#{ActiveRecord::VERSION::MINOR}]"
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,66 @@
1
+ class AddPeriodTotalsToLlmCostTracker < ActiveRecord::Migration<%= migration_version %>
2
+ def up
3
+ create_table :llm_cost_tracker_period_totals do |t|
4
+ t.string :period, null: false
5
+ t.date :period_start, null: false
6
+ t.decimal :total_cost, precision: 20, scale: 8, null: false, default: 0
7
+
8
+ t.timestamps
9
+ end unless table_exists?(:llm_cost_tracker_period_totals)
10
+
11
+ add_index :llm_cost_tracker_period_totals, [:period, :period_start],
12
+ unique: true unless index_exists?(:llm_cost_tracker_period_totals, [:period, :period_start])
13
+
14
+ backfill_period_totals
15
+ end
16
+
17
+ def down
18
+ remove_index :llm_cost_tracker_period_totals, [:period, :period_start] if index_exists?(:llm_cost_tracker_period_totals, [:period, :period_start])
19
+ drop_table :llm_cost_tracker_period_totals if table_exists?(:llm_cost_tracker_period_totals)
20
+ end
21
+
22
+ private
23
+
24
+ def backfill_period_totals
25
+ return unless table_exists?(:llm_api_calls)
26
+
27
+ backfill_period_total("day", day_bucket_sql)
28
+ backfill_period_total("month", month_bucket_sql)
29
+ end
30
+
31
+ def backfill_period_total(period, bucket_sql)
32
+ execute <<~SQL
33
+ INSERT INTO llm_cost_tracker_period_totals (period, period_start, total_cost, created_at, updated_at)
34
+ SELECT #{connection.quote(period)} AS period,
35
+ #{bucket_sql} AS period_start,
36
+ SUM(total_cost) AS total_cost,
37
+ CURRENT_TIMESTAMP,
38
+ CURRENT_TIMESTAMP
39
+ FROM llm_api_calls
40
+ WHERE total_cost IS NOT NULL
41
+ GROUP BY #{bucket_sql}
42
+ SQL
43
+ end
44
+
45
+ def day_bucket_sql
46
+ case connection.adapter_name
47
+ when /postgres/i
48
+ "DATE_TRUNC('day', tracked_at)::date"
49
+ when /mysql/i
50
+ "DATE(tracked_at)"
51
+ else
52
+ "date(tracked_at)"
53
+ end
54
+ end
55
+
56
+ def month_bucket_sql
57
+ case connection.adapter_name
58
+ when /postgres/i
59
+ "DATE_TRUNC('month', tracked_at)::date"
60
+ when /mysql/i
61
+ "DATE_FORMAT(tracked_at, '%Y-%m-01')"
62
+ else
63
+ "strftime('%Y-%m-01', tracked_at)"
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,29 @@
1
+ class AddUsageBreakdownToLlmApiCalls < ActiveRecord::Migration<%= migration_version %>
2
+ def up
3
+ unless column_exists?(:llm_api_calls, :cache_read_input_tokens)
4
+ add_column :llm_api_calls, :cache_read_input_tokens, :integer, null: false, default: 0
5
+ end
6
+ unless column_exists?(:llm_api_calls, :cache_write_input_tokens)
7
+ add_column :llm_api_calls, :cache_write_input_tokens, :integer, null: false, default: 0
8
+ end
9
+ unless column_exists?(:llm_api_calls, :hidden_output_tokens)
10
+ add_column :llm_api_calls, :hidden_output_tokens, :integer, null: false, default: 0
11
+ end
12
+ unless column_exists?(:llm_api_calls, :cache_read_input_cost)
13
+ add_column :llm_api_calls, :cache_read_input_cost, :decimal, precision: 20, scale: 8
14
+ end
15
+ unless column_exists?(:llm_api_calls, :cache_write_input_cost)
16
+ add_column :llm_api_calls, :cache_write_input_cost, :decimal, precision: 20, scale: 8
17
+ end
18
+ add_column :llm_api_calls, :pricing_mode, :string unless column_exists?(:llm_api_calls, :pricing_mode)
19
+ end
20
+
21
+ def down
22
+ remove_column :llm_api_calls, :pricing_mode if column_exists?(:llm_api_calls, :pricing_mode)
23
+ remove_column :llm_api_calls, :cache_write_input_cost if column_exists?(:llm_api_calls, :cache_write_input_cost)
24
+ remove_column :llm_api_calls, :cache_read_input_cost if column_exists?(:llm_api_calls, :cache_read_input_cost)
25
+ remove_column :llm_api_calls, :hidden_output_tokens if column_exists?(:llm_api_calls, :hidden_output_tokens)
26
+ remove_column :llm_api_calls, :cache_write_input_tokens if column_exists?(:llm_api_calls, :cache_write_input_tokens)
27
+ remove_column :llm_api_calls, :cache_read_input_tokens if column_exists?(:llm_api_calls, :cache_read_input_tokens)
28
+ end
29
+ end
@@ -6,13 +6,19 @@ class CreateLlmApiCalls < ActiveRecord::Migration<%= migration_version %>
6
6
  t.integer :input_tokens, null: false, default: 0
7
7
  t.integer :output_tokens, null: false, default: 0
8
8
  t.integer :total_tokens, null: false, default: 0
9
+ t.integer :cache_read_input_tokens, null: false, default: 0
10
+ t.integer :cache_write_input_tokens, null: false, default: 0
11
+ t.integer :hidden_output_tokens, null: false, default: 0
9
12
  t.decimal :input_cost, precision: 20, scale: 8
13
+ t.decimal :cache_read_input_cost, precision: 20, scale: 8
14
+ t.decimal :cache_write_input_cost, precision: 20, scale: 8
10
15
  t.decimal :output_cost, precision: 20, scale: 8
11
16
  t.decimal :total_cost, precision: 20, scale: 8
12
17
  t.integer :latency_ms
13
18
  t.boolean :stream, null: false, default: false
14
19
  t.string :usage_source
15
20
  t.string :provider_response_id
21
+ t.string :pricing_mode
16
22
  if postgresql?
17
23
  t.jsonb :tags, null: false, default: {}
18
24
  else
@@ -23,6 +29,14 @@ class CreateLlmApiCalls < ActiveRecord::Migration<%= migration_version %>
23
29
  t.timestamps
24
30
  end
25
31
 
32
+ create_table :llm_cost_tracker_period_totals do |t|
33
+ t.string :period, null: false
34
+ t.date :period_start, null: false
35
+ t.decimal :total_cost, precision: 20, scale: 8, null: false, default: 0
36
+
37
+ t.timestamps
38
+ end
39
+
26
40
  add_index :llm_api_calls, :provider
27
41
  add_index :llm_api_calls, :model
28
42
  add_index :llm_api_calls, :tracked_at
@@ -31,6 +45,7 @@ class CreateLlmApiCalls < ActiveRecord::Migration<%= migration_version %>
31
45
  add_index :llm_api_calls, :usage_source
32
46
  add_index :llm_api_calls, :provider_response_id
33
47
  add_index :llm_api_calls, :tags, using: :gin if postgresql?
48
+ add_index :llm_cost_tracker_period_totals, [:period, :period_start], unique: true
34
49
  end
35
50
 
36
51
  private
@@ -12,6 +12,8 @@ LlmCostTracker.configure do |config|
12
12
 
13
13
  # Monthly budget in USD. Set to nil to disable budget alerts.
14
14
  # config.monthly_budget = 100.00
15
+ # config.daily_budget = 10.00
16
+ # config.per_call_budget = 1.00
15
17
  # config.budget_exceeded_behavior = :notify # :notify, :raise, or :block_requests
16
18
 
17
19
  # What to do when storage fails.
@@ -23,7 +25,7 @@ LlmCostTracker.configure do |config|
23
25
  # Callback when monthly budget is exceeded.
24
26
  # config.on_budget_exceeded = ->(data) {
25
27
  # Rails.logger.warn "[LlmCostTracker] Budget exceeded! " \
26
- # "Monthly total: $#{data[:monthly_total]}, Budget: $#{data[:budget]}"
28
+ # "#{data[:budget_type]} total: $#{data[:total]}, Budget: $#{data[:budget]}"
27
29
  # # Or send a Slack notification, email, etc.
28
30
  # }
29
31
 
@@ -8,9 +8,9 @@
8
8
  # Supported price keys:
9
9
  # - input
10
10
  # - output
11
- # - cached_input
12
11
  # - cache_read_input
13
- # - cache_creation_input
12
+ # - cache_write_input
13
+ # - mode_input / mode_output / mode_cache_read_input / mode_cache_write_input
14
14
  #
15
15
  # Optional metadata keys, ignored by cost calculation:
16
16
  # - _source
@@ -24,10 +24,18 @@
24
24
  # models:
25
25
  # "ft:gpt-4o-mini:my-org":
26
26
  # input: 0.30
27
- # cached_input: 0.15
27
+ # cache_read_input: 0.15
28
28
  # output: 1.20
29
29
  # _notes: "Internal fine-tune rate"
30
30
  #
31
+ # Example: alternate pricing mode
32
+ # models:
33
+ # "batchable-model":
34
+ # input: 1.00
35
+ # output: 2.00
36
+ # batch_input: 0.50
37
+ # batch_output: 1.00
38
+ #
31
39
  # Example: negotiated provider discount
32
40
  # models:
33
41
  # "gpt-4o":
@@ -8,6 +8,7 @@ class UpgradeLlmApiCallTagsToJsonb < ActiveRecord::Migration<%= migration_versio
8
8
  return if tags_jsonb?
9
9
 
10
10
  remove_index :llm_api_calls, :tags if index_exists?(:llm_api_calls, :tags)
11
+ say "Upgrading llm_api_calls.tags to jsonb rewrites the table on PostgreSQL. Run this migration during a maintenance window on large datasets."
11
12
 
12
13
  change_column(
13
14
  :llm_api_calls,
@@ -8,6 +8,8 @@ require_relative "../logging"
8
8
  module LlmCostTracker
9
9
  module Middleware
10
10
  class Faraday < ::Faraday::Middleware
11
+ STREAM_CAPTURE_LIMIT_BYTES = 1_048_576
12
+
11
13
  def initialize(app, **options)
12
14
  super(app)
13
15
  @tags = options.fetch(:tags, {})
@@ -85,15 +87,12 @@ module LlmCostTracker
85
87
  end
86
88
 
87
89
  def parse_stream(parser, request_url, request_body, response_env, stream_buffer)
88
- body = stream_buffer&.string
90
+ body = stream_buffer&.dig(:buffer)&.string
89
91
  body = read_body(response_env.body) if body.nil? || body.empty?
90
92
 
91
93
  if body.nil? || body.empty?
92
- Logging.warn(
93
- "Unable to capture streaming response for #{request_url}; " \
94
- "fall back to LlmCostTracker.track_stream for manual capture."
95
- )
96
- return nil
94
+ Logging.warn(capture_warning(request_url, stream_buffer))
95
+ return parser.parse_stream(request_url, request_body, response_env.status, [])
97
96
  end
98
97
 
99
98
  events = Parsers::SSE.parse(body)
@@ -106,12 +105,21 @@ module LlmCostTracker
106
105
  original = request_env.request.on_data
107
106
  return nil unless original
108
107
 
109
- buffer = StringIO.new
108
+ state = { buffer: StringIO.new, bytes: 0, overflowed: false }
110
109
  request_env.request.on_data = proc do |chunk, size, env|
111
- buffer << chunk.to_s
110
+ chunk = chunk.to_s
111
+ unless state[:overflowed]
112
+ if state[:bytes] + chunk.bytesize <= STREAM_CAPTURE_LIMIT_BYTES
113
+ state[:buffer] << chunk
114
+ state[:bytes] += chunk.bytesize
115
+ else
116
+ state[:overflowed] = true
117
+ state[:buffer] = nil
118
+ end
119
+ end
112
120
  original.call(chunk, size, env)
113
121
  end
114
- buffer
122
+ state
115
123
  rescue StandardError => e
116
124
  Logging.warn("Unable to install streaming tap: #{e.class}: #{e.message}")
117
125
  nil
@@ -145,6 +153,16 @@ module LlmCostTracker
145
153
  def elapsed_ms(started_at)
146
154
  ((monotonic_time - started_at) * 1000).round
147
155
  end
156
+
157
+ def capture_warning(request_url, stream_buffer)
158
+ unless stream_buffer&.dig(:overflowed)
159
+ return "Unable to capture streaming response for #{request_url}; " \
160
+ "recording usage_source=unknown. Use LlmCostTracker.track_stream for manual capture."
161
+ end
162
+
163
+ "Streaming response for #{request_url} exceeded #{STREAM_CAPTURE_LIMIT_BYTES} bytes; " \
164
+ "recording usage_source=unknown. Use LlmCostTracker.track_stream for manual capture."
165
+ end
148
166
  end
149
167
  end
150
168
  end
@@ -7,10 +7,9 @@ module LlmCostTracker
7
7
  :input_tokens,
8
8
  :output_tokens,
9
9
  :total_tokens,
10
- :cached_input_tokens,
11
10
  :cache_read_input_tokens,
12
- :cache_creation_input_tokens,
13
- :reasoning_tokens,
11
+ :cache_write_input_tokens,
12
+ :hidden_output_tokens,
14
13
  :stream,
15
14
  :usage_source,
16
15
  :provider_response_id
@@ -34,11 +33,10 @@ module LlmCostTracker
34
33
  model: attributes.fetch(:model),
35
34
  input_tokens: attributes.fetch(:input_tokens).to_i,
36
35
  output_tokens: attributes.fetch(:output_tokens).to_i,
37
- total_tokens: attributes.fetch(:total_tokens, 0).to_i,
38
- cached_input_tokens: attributes[:cached_input_tokens],
36
+ total_tokens: attributes.fetch(:total_tokens, usage_breakdown(attributes).total_tokens).to_i,
39
37
  cache_read_input_tokens: attributes[:cache_read_input_tokens],
40
- cache_creation_input_tokens: attributes[:cache_creation_input_tokens],
41
- reasoning_tokens: attributes[:reasoning_tokens],
38
+ cache_write_input_tokens: attributes[:cache_write_input_tokens],
39
+ hidden_output_tokens: attributes[:hidden_output_tokens],
42
40
  stream: attributes[:stream] || false,
43
41
  usage_source: attributes[:usage_source],
44
42
  provider_response_id: attributes[:provider_response_id]
@@ -52,5 +50,16 @@ module LlmCostTracker
52
50
  def to_h
53
51
  super.compact
54
52
  end
53
+
54
+ def self.usage_breakdown(attributes)
55
+ UsageBreakdown.build(
56
+ input_tokens: attributes.fetch(:input_tokens),
57
+ output_tokens: attributes.fetch(:output_tokens),
58
+ cache_read_input_tokens: attributes[:cache_read_input_tokens],
59
+ cache_write_input_tokens: attributes[:cache_write_input_tokens],
60
+ hidden_output_tokens: attributes[:hidden_output_tokens]
61
+ )
62
+ end
63
+ private_class_method :usage_breakdown
55
64
  end
56
65
  end
@@ -28,6 +28,8 @@ module LlmCostTracker
28
28
  return nil unless usage
29
29
 
30
30
  request = safe_json_parse(request_body)
31
+ cache_read = usage["cache_read_input_tokens"].to_i
32
+ cache_write = usage["cache_creation_input_tokens"].to_i
31
33
 
32
34
  ParsedUsage.build(
33
35
  provider: "anthropic",
@@ -35,10 +37,9 @@ module LlmCostTracker
35
37
  model: response["model"] || request["model"],
36
38
  input_tokens: usage["input_tokens"].to_i,
37
39
  output_tokens: usage["output_tokens"].to_i,
38
- total_tokens: usage["input_tokens"].to_i + usage["output_tokens"].to_i +
39
- usage["cache_read_input_tokens"].to_i + usage["cache_creation_input_tokens"].to_i,
40
+ total_tokens: usage["input_tokens"].to_i + usage["output_tokens"].to_i + cache_read + cache_write,
40
41
  cache_read_input_tokens: usage["cache_read_input_tokens"],
41
- cache_creation_input_tokens: usage["cache_creation_input_tokens"],
42
+ cache_write_input_tokens: usage["cache_creation_input_tokens"],
42
43
  usage_source: :response
43
44
  )
44
45
  end
@@ -105,7 +106,7 @@ module LlmCostTracker
105
106
  input = usage["input_tokens"].to_i
106
107
  output = usage["output_tokens"].to_i
107
108
  cache_read = usage["cache_read_input_tokens"].to_i
108
- cache_creation = usage["cache_creation_input_tokens"].to_i
109
+ cache_write = usage["cache_creation_input_tokens"].to_i
109
110
 
110
111
  ParsedUsage.build(
111
112
  provider: "anthropic",
@@ -113,9 +114,9 @@ module LlmCostTracker
113
114
  model: model,
114
115
  input_tokens: input,
115
116
  output_tokens: output,
116
- total_tokens: input + output + cache_read + cache_creation,
117
+ total_tokens: input + output + cache_read + cache_write,
117
118
  cache_read_input_tokens: usage["cache_read_input_tokens"],
118
- cache_creation_input_tokens: usage["cache_creation_input_tokens"],
119
+ cache_write_input_tokens: usage["cache_creation_input_tokens"],
119
120
  stream: true,
120
121
  usage_source: :stream_final
121
122
  )
@@ -23,7 +23,8 @@ module LlmCostTracker
23
23
  body = request_body.to_s
24
24
  return false if body.empty?
25
25
 
26
- body.include?('"stream":true') || body.include?('"stream": true') || body.include?("stream: true")
26
+ request = safe_json_parse(body)
27
+ request.is_a?(Hash) && request["stream"] == true
27
28
  end
28
29
 
29
30
  def parse_stream(_request_url, _request_body, _response_status, _events)
@@ -74,13 +74,16 @@ module LlmCostTracker
74
74
  private
75
75
 
76
76
  def build_parsed_usage(request_url, usage, usage_source:, stream: false, provider_response_id: nil)
77
+ cache_read = usage["cachedContentTokenCount"].to_i
78
+
77
79
  ParsedUsage.build(
78
80
  provider: "gemini",
79
81
  model: extract_model_from_url(request_url),
80
- input_tokens: usage["promptTokenCount"].to_i,
82
+ input_tokens: [usage["promptTokenCount"].to_i - cache_read, 0].max,
81
83
  output_tokens: output_tokens(usage),
82
84
  total_tokens: usage["totalTokenCount"].to_i,
83
- cached_input_tokens: usage["cachedContentTokenCount"],
85
+ cache_read_input_tokens: usage["cachedContentTokenCount"],
86
+ hidden_output_tokens: usage["thoughtsTokenCount"],
84
87
  stream: stream,
85
88
  usage_source: usage_source,
86
89
  provider_response_id: provider_response_id
@@ -13,15 +13,17 @@ module LlmCostTracker
13
13
  return nil unless usage
14
14
 
15
15
  request = safe_json_parse(request_body)
16
+ cache_read = cache_read_input_tokens(usage)
16
17
 
17
18
  ParsedUsage.build(
18
19
  provider: provider_for(request_url),
19
20
  provider_response_id: response["id"],
20
21
  model: response["model"] || request["model"],
21
- input_tokens: (usage["prompt_tokens"] || usage["input_tokens"]).to_i,
22
+ input_tokens: regular_input_tokens(usage, cache_read),
22
23
  output_tokens: (usage["completion_tokens"] || usage["output_tokens"]).to_i,
23
24
  total_tokens: usage["total_tokens"].to_i,
24
- cached_input_tokens: cached_input_tokens(usage),
25
+ cache_read_input_tokens: cache_read,
26
+ hidden_output_tokens: hidden_output_tokens(usage),
25
27
  usage_source: :response
26
28
  )
27
29
  end
@@ -34,14 +36,16 @@ module LlmCostTracker
34
36
  usage = detect_stream_usage(events)
35
37
 
36
38
  if usage
39
+ cache_read = cache_read_input_tokens(usage)
37
40
  ParsedUsage.build(
38
41
  provider: provider_for(request_url),
39
42
  provider_response_id: detect_stream_response_id(events),
40
43
  model: model,
41
- input_tokens: (usage["prompt_tokens"] || usage["input_tokens"]).to_i,
44
+ input_tokens: regular_input_tokens(usage, cache_read),
42
45
  output_tokens: (usage["completion_tokens"] || usage["output_tokens"]).to_i,
43
46
  total_tokens: usage["total_tokens"].to_i,
44
- cached_input_tokens: cached_input_tokens(usage),
47
+ cache_read_input_tokens: cache_read,
48
+ hidden_output_tokens: hidden_output_tokens(usage),
45
49
  stream: true,
46
50
  usage_source: :stream_final
47
51
  )
@@ -92,10 +96,19 @@ module LlmCostTracker
92
96
  nil
93
97
  end
94
98
 
95
- def cached_input_tokens(usage)
99
+ def regular_input_tokens(usage, cache_read)
100
+ [(usage["prompt_tokens"] || usage["input_tokens"]).to_i - cache_read.to_i, 0].max
101
+ end
102
+
103
+ def cache_read_input_tokens(usage)
96
104
  details = usage["prompt_tokens_details"] || usage["input_tokens_details"] || {}
97
105
  details["cached_tokens"]
98
106
  end
107
+
108
+ def hidden_output_tokens(usage)
109
+ details = usage["completion_tokens_details"] || usage["output_tokens_details"] || {}
110
+ details["reasoning_tokens"]
111
+ end
99
112
  end
100
113
  end
101
114
  end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "active_record"
4
+
5
+ module LlmCostTracker
6
+ class PeriodTotal < ActiveRecord::Base
7
+ self.table_name = "llm_cost_tracker_period_totals"
8
+ end
9
+ end
@@ -10,7 +10,7 @@ module LlmCostTracker
10
10
  module PriceRegistry
11
11
  DEFAULT_PRICES_PATH = File.expand_path("prices.json", __dir__)
12
12
  EMPTY_PRICES = {}.freeze
13
- PRICE_KEYS = %w[input cached_input output cache_read_input cache_creation_input].freeze
13
+ PRICE_KEYS = %w[input output cache_read_input cache_write_input].freeze
14
14
  METADATA_KEYS = %w[_source _source_version _fetched_at _updated _notes _validator_override].freeze
15
15
  MUTEX = Monitor.new
16
16
 
@@ -60,7 +60,7 @@ module LlmCostTracker
60
60
  def normalize_price_entry(price)
61
61
  price.each_with_object({}) do |(key, value), normalized|
62
62
  key = key.to_s
63
- normalized[key.to_sym] = Float(value) if PRICE_KEYS.include?(key)
63
+ normalized[key.to_sym] = Float(value) if price_key?(key)
64
64
  end
65
65
  end
66
66
 
@@ -80,15 +80,25 @@ module LlmCostTracker
80
80
  end
81
81
 
82
82
  def warn_unknown_keys(model, price, path)
83
- unknown_keys = price.keys.map(&:to_s) - PRICE_KEYS - METADATA_KEYS
83
+ unknown_keys = price.keys.map(&:to_s).reject do |key|
84
+ price_key?(key) || METADATA_KEYS.include?(key)
85
+ end
84
86
  return if unknown_keys.empty?
85
87
 
86
88
  Logging.warn(
87
89
  "Unknown price keys #{unknown_keys.inspect} for #{model.inspect} in #{path}; " \
88
- "ignored. Known keys: #{(PRICE_KEYS + METADATA_KEYS).inspect}"
90
+ "ignored. Known keys: #{(PRICE_KEYS + METADATA_KEYS).inspect}; mode-specific keys use mode_input"
89
91
  )
90
92
  end
91
93
 
94
+ def price_key?(key)
95
+ return true if PRICE_KEYS.include?(key)
96
+
97
+ PRICE_KEYS.any? do |base_key|
98
+ key.end_with?("_#{base_key}") && key.delete_suffix("_#{base_key}") != ""
99
+ end
100
+ end
101
+
92
102
  def load_price_file(path)
93
103
  contents = File.read(path)
94
104
  return YAML.safe_load(contents, aliases: false) || {} if yaml_file?(path)
@@ -6,7 +6,7 @@ module LlmCostTracker
6
6
  Discrepancy = Data.define(:model, :field, :values)
7
7
 
8
8
  PRIORITY_ORDER = %i[litellm openrouter].freeze
9
- SUPPLEMENTAL_FIELDS = %i[cached_input cache_read_input cache_creation_input].freeze
9
+ SUPPLEMENTAL_FIELDS = %i[cache_read_input cache_write_input].freeze
10
10
 
11
11
  def merge(results_by_source)
12
12
  prices = collect_prices(results_by_source)
@@ -7,24 +7,22 @@ module LlmCostTracker
7
7
  :provider,
8
8
  :input,
9
9
  :output,
10
- :cached_input,
11
10
  :cache_read_input,
12
- :cache_creation_input,
11
+ :cache_write_input,
13
12
  :source,
14
13
  :source_version,
15
14
  :fetched_at
16
15
  )
17
16
 
18
17
  class RawPrice
19
- PRICE_FIELDS = %w[input output cached_input cache_read_input cache_creation_input].freeze
18
+ PRICE_FIELDS = %w[input output cache_read_input cache_write_input].freeze
20
19
 
21
20
  def to_registry_entry(today:)
22
21
  {
23
22
  "input" => input,
24
23
  "output" => output,
25
- "cached_input" => cached_input,
26
24
  "cache_read_input" => cache_read_input,
27
- "cache_creation_input" => cache_creation_input,
25
+ "cache_write_input" => cache_write_input,
28
26
  "_source" => source.to_s,
29
27
  "_source_version" => source_version,
30
28
  "_fetched_at" => fetched_at || today.iso8601