llm_cost_tracker 0.7.2 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. checksums.yaml +4 -4
  2. data/.ruby-version +1 -0
  3. data/CHANGELOG.md +72 -1
  4. data/README.md +58 -221
  5. data/app/assets/llm_cost_tracker/application.css +218 -41
  6. data/app/controllers/llm_cost_tracker/application_controller.rb +30 -17
  7. data/app/controllers/llm_cost_tracker/assets_controller.rb +11 -1
  8. data/app/controllers/llm_cost_tracker/calls_controller.rb +19 -14
  9. data/app/controllers/llm_cost_tracker/data_quality_controller.rb +10 -2
  10. data/app/helpers/llm_cost_tracker/application_helper.rb +11 -24
  11. data/app/helpers/llm_cost_tracker/dashboard_filter_helper.rb +3 -21
  12. data/app/helpers/llm_cost_tracker/dashboard_filter_options_helper.rb +4 -4
  13. data/app/helpers/llm_cost_tracker/dashboard_query_helper.rb +1 -1
  14. data/app/helpers/llm_cost_tracker/token_usage_helper.rb +20 -7
  15. data/app/models/llm_cost_tracker/call.rb +169 -0
  16. data/app/models/llm_cost_tracker/call_line_item.rb +22 -0
  17. data/app/models/llm_cost_tracker/call_rollup.rb +9 -0
  18. data/app/models/llm_cost_tracker/call_tag.rb +16 -0
  19. data/app/models/llm_cost_tracker/ingestion/inbox_entry.rb +13 -0
  20. data/app/models/llm_cost_tracker/ingestion/lease.rb +1 -1
  21. data/app/models/llm_cost_tracker/provider_invoice.rb +9 -0
  22. data/app/services/llm_cost_tracker/dashboard/data_quality.rb +125 -34
  23. data/app/services/llm_cost_tracker/dashboard/date_range.rb +1 -1
  24. data/app/services/llm_cost_tracker/dashboard/filter.rb +2 -2
  25. data/app/services/llm_cost_tracker/dashboard/overview_stats.rb +74 -21
  26. data/app/services/llm_cost_tracker/dashboard/pagination.rb +6 -4
  27. data/app/services/llm_cost_tracker/dashboard/params.rb +8 -2
  28. data/app/services/llm_cost_tracker/dashboard/provider_breakdown.rb +1 -1
  29. data/app/services/llm_cost_tracker/dashboard/spend_anomaly.rb +4 -3
  30. data/app/services/llm_cost_tracker/dashboard/tag_breakdown.rb +42 -9
  31. data/app/services/llm_cost_tracker/dashboard/tag_key_explorer.rb +14 -37
  32. data/app/services/llm_cost_tracker/dashboard/time_series.rb +1 -1
  33. data/app/services/llm_cost_tracker/dashboard/top_models.rb +1 -1
  34. data/app/views/llm_cost_tracker/calls/index.html.erb +33 -75
  35. data/app/views/llm_cost_tracker/calls/show.html.erb +62 -7
  36. data/app/views/llm_cost_tracker/dashboard/index.html.erb +9 -50
  37. data/app/views/llm_cost_tracker/data_quality/index.html.erb +103 -126
  38. data/app/views/llm_cost_tracker/errors/database.html.erb +1 -1
  39. data/app/views/llm_cost_tracker/models/index.html.erb +18 -50
  40. data/app/views/llm_cost_tracker/shared/_filters.html.erb +63 -0
  41. data/app/views/llm_cost_tracker/shared/_sort.html.erb +13 -0
  42. data/app/views/llm_cost_tracker/shared/setup_required.html.erb +1 -1
  43. data/app/views/llm_cost_tracker/tags/index.html.erb +3 -34
  44. data/app/views/llm_cost_tracker/tags/show.html.erb +5 -37
  45. data/lib/llm_cost_tracker/billing/components.rb +53 -0
  46. data/lib/llm_cost_tracker/billing/components.yml +117 -0
  47. data/lib/llm_cost_tracker/billing/cost_status.rb +45 -0
  48. data/lib/llm_cost_tracker/billing/line_item.rb +189 -0
  49. data/lib/llm_cost_tracker/budget.rb +23 -35
  50. data/lib/llm_cost_tracker/capture/stream_collector.rb +47 -33
  51. data/lib/llm_cost_tracker/configuration.rb +36 -19
  52. data/lib/llm_cost_tracker/doctor/cost_drift_check.rb +54 -0
  53. data/lib/llm_cost_tracker/doctor/ingestion_check.rb +24 -32
  54. data/lib/llm_cost_tracker/doctor/legacy_audit_check.rb +36 -0
  55. data/lib/llm_cost_tracker/doctor/legacy_billing_status_check.rb +22 -0
  56. data/lib/llm_cost_tracker/doctor/price_check.rb +2 -2
  57. data/lib/llm_cost_tracker/doctor/pricing_snapshot_drift_check.rb +85 -0
  58. data/lib/llm_cost_tracker/doctor/probe.rb +17 -0
  59. data/lib/llm_cost_tracker/doctor/schema_check.rb +31 -0
  60. data/lib/llm_cost_tracker/doctor.rb +43 -45
  61. data/lib/llm_cost_tracker/errors.rb +5 -19
  62. data/lib/llm_cost_tracker/event.rb +10 -2
  63. data/lib/llm_cost_tracker/generators/llm_cost_tracker/install_generator.rb +4 -2
  64. data/lib/llm_cost_tracker/generators/llm_cost_tracker/prices_generator.rb +2 -6
  65. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_calls.rb.erb +157 -0
  66. data/lib/llm_cost_tracker/ingestion/batch.rb +11 -12
  67. data/lib/llm_cost_tracker/ingestion/inbox.rb +39 -23
  68. data/lib/llm_cost_tracker/ingestion/worker.rb +14 -5
  69. data/lib/llm_cost_tracker/ingestion.rb +28 -22
  70. data/lib/llm_cost_tracker/integrations/anthropic.rb +45 -38
  71. data/lib/llm_cost_tracker/integrations/base.rb +36 -29
  72. data/lib/llm_cost_tracker/integrations/openai.rb +85 -40
  73. data/lib/llm_cost_tracker/integrations/ruby_llm.rb +5 -5
  74. data/lib/llm_cost_tracker/integrations.rb +2 -2
  75. data/lib/llm_cost_tracker/ledger/period/totals.rb +12 -9
  76. data/lib/llm_cost_tracker/ledger/period.rb +5 -5
  77. data/lib/llm_cost_tracker/ledger/rollups/upsert_sql.rb +4 -10
  78. data/lib/llm_cost_tracker/ledger/rollups.rb +76 -25
  79. data/lib/llm_cost_tracker/ledger/schema/adapter.rb +18 -0
  80. data/lib/llm_cost_tracker/ledger/schema/call_line_items.rb +50 -0
  81. data/lib/llm_cost_tracker/ledger/schema/call_rollups.rb +37 -0
  82. data/lib/llm_cost_tracker/ledger/schema/call_tags.rb +26 -0
  83. data/lib/llm_cost_tracker/ledger/schema/calls.rb +34 -23
  84. data/lib/llm_cost_tracker/ledger/schema/provider_invoices.rb +57 -0
  85. data/lib/llm_cost_tracker/ledger/store.rb +110 -18
  86. data/lib/llm_cost_tracker/ledger/tags/query.rb +5 -11
  87. data/lib/llm_cost_tracker/ledger/tags/sql.rb +27 -14
  88. data/lib/llm_cost_tracker/ledger.rb +4 -2
  89. data/lib/llm_cost_tracker/logging.rb +2 -5
  90. data/lib/llm_cost_tracker/middleware/faraday.rb +7 -6
  91. data/lib/llm_cost_tracker/parsers/anthropic.rb +52 -7
  92. data/lib/llm_cost_tracker/parsers/base.rb +8 -3
  93. data/lib/llm_cost_tracker/parsers/gemini.rb +101 -15
  94. data/lib/llm_cost_tracker/parsers/openai_compatible.rb +10 -2
  95. data/lib/llm_cost_tracker/parsers/openai_service_charges.rb +87 -0
  96. data/lib/llm_cost_tracker/parsers/openai_usage.rb +48 -21
  97. data/lib/llm_cost_tracker/parsers/sse.rb +1 -1
  98. data/lib/llm_cost_tracker/parsers.rb +1 -1
  99. data/lib/llm_cost_tracker/prices.json +105 -20
  100. data/lib/llm_cost_tracker/pricing/effective_prices.rb +57 -19
  101. data/lib/llm_cost_tracker/pricing/explainer.rb +4 -5
  102. data/lib/llm_cost_tracker/pricing/lookup.rb +38 -34
  103. data/lib/llm_cost_tracker/pricing/registry.rb +65 -45
  104. data/lib/llm_cost_tracker/pricing/service_charges.rb +204 -0
  105. data/lib/llm_cost_tracker/pricing/sync/fetcher.rb +26 -17
  106. data/lib/llm_cost_tracker/pricing/sync/registry_diff.rb +6 -15
  107. data/lib/llm_cost_tracker/pricing/sync.rb +57 -10
  108. data/lib/llm_cost_tracker/pricing/sync_change_printer.rb +32 -0
  109. data/lib/llm_cost_tracker/pricing.rb +190 -26
  110. data/lib/llm_cost_tracker/railtie.rb +0 -8
  111. data/lib/llm_cost_tracker/report/data.rb +16 -8
  112. data/lib/llm_cost_tracker/report.rb +0 -4
  113. data/lib/llm_cost_tracker/retention.rb +8 -8
  114. data/lib/llm_cost_tracker/tags/context.rb +2 -4
  115. data/lib/llm_cost_tracker/tags/key.rb +4 -0
  116. data/lib/llm_cost_tracker/tags/sanitizer.rb +12 -17
  117. data/lib/llm_cost_tracker/timing.rb +15 -0
  118. data/lib/llm_cost_tracker/token_usage.rb +56 -42
  119. data/lib/llm_cost_tracker/tracker.rb +67 -24
  120. data/lib/llm_cost_tracker/usage_capture.rb +29 -8
  121. data/lib/llm_cost_tracker/version.rb +1 -1
  122. data/lib/llm_cost_tracker.rb +36 -35
  123. data/lib/tasks/llm_cost_tracker.rake +22 -17
  124. metadata +36 -41
  125. data/app/models/llm_cost_tracker/ingestion/event.rb +0 -13
  126. data/app/models/llm_cost_tracker/ledger/call.rb +0 -45
  127. data/app/models/llm_cost_tracker/ledger/call_metrics.rb +0 -66
  128. data/app/models/llm_cost_tracker/ledger/period/grouping.rb +0 -71
  129. data/app/models/llm_cost_tracker/ledger/period/total.rb +0 -13
  130. data/app/models/llm_cost_tracker/ledger/tags/accessors.rb +0 -19
  131. data/lib/llm_cost_tracker/configuration/instrumentation.rb +0 -33
  132. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_ingestion_generator.rb +0 -29
  133. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_latency_ms_generator.rb +0 -29
  134. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_period_totals_generator.rb +0 -29
  135. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_provider_response_id_generator.rb +0 -29
  136. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_streaming_generator.rb +0 -29
  137. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_token_usage_generator.rb +0 -42
  138. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_ingestion_to_llm_cost_tracker.rb.erb +0 -33
  139. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_latency_ms_to_llm_api_calls.rb.erb +0 -9
  140. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_period_totals_to_llm_cost_tracker.rb.erb +0 -104
  141. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_provider_response_id_to_llm_api_calls.rb.erb +0 -15
  142. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_streaming_to_llm_api_calls.rb.erb +0 -21
  143. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_token_usage_to_llm_api_calls.rb.erb +0 -22
  144. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_api_calls.rb.erb +0 -83
  145. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_cost_precision.rb.erb +0 -26
  146. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_tags_to_jsonb.rb.erb +0 -44
  147. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_cost_precision_generator.rb +0 -29
  148. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_tags_to_jsonb_generator.rb +0 -29
  149. data/lib/llm_cost_tracker/ledger/rollups/batch.rb +0 -43
  150. data/lib/llm_cost_tracker/ledger/schema/period_totals.rb +0 -32
  151. data/lib/llm_cost_tracker/pricing/components.rb +0 -37
  152. data/lib/llm_cost_tracker/pricing/sync/registry_loader.rb +0 -63
@@ -1,8 +1,14 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "openai_service_charges"
4
+
3
5
  module LlmCostTracker
4
6
  module Parsers
5
7
  module OpenaiUsage
8
+ include OpenaiServiceCharges
9
+
10
+ OPENAI_DATA_RESIDENCY_HOST_PATTERN = /\A[a-z]{2,3}\.api\.openai\.com\z/
11
+
6
12
  private
7
13
 
8
14
  def parse_openai_usage(request_url:, request_body:, response_status:, response_body:)
@@ -27,7 +33,8 @@ module LlmCostTracker
27
33
  ),
28
34
  model: model,
29
35
  token_usage: token_usage(usage: usage, cache_read: cache_read),
30
- usage_source: :response
36
+ usage_source: :response,
37
+ service_line_items: openai_service_line_items(response)
31
38
  )
32
39
  end
33
40
 
@@ -35,8 +42,7 @@ module LlmCostTracker
35
42
  return nil unless response_status == 200
36
43
 
37
44
  request = safe_json_parse(request_body)
38
- model =
39
- find_event_value(events) { |data| data["model"] || data.dig("response", "model") } || request["model"]
45
+ model = find_event_value(events) { |data| data["model"] || data.dig("response", "model") } || request["model"]
40
46
  usage = detect_stream_usage(events)
41
47
  response_id = find_event_value(events) { |data| data["id"] || data.dig("response", "id") }
42
48
  pricing_mode = pricing_mode(
@@ -44,6 +50,7 @@ module LlmCostTracker
44
50
  model: model,
45
51
  service_tier: stream_pricing_mode(events) || request["service_tier"]
46
52
  )
53
+ service_line_items = openai_stream_service_line_items(events)
47
54
 
48
55
  if usage
49
56
  cache_read = cache_read_input_tokens(usage)
@@ -54,14 +61,16 @@ module LlmCostTracker
54
61
  model: model,
55
62
  token_usage: token_usage(usage: usage, cache_read: cache_read),
56
63
  stream: true,
57
- usage_source: :stream_final
64
+ usage_source: :stream_final,
65
+ service_line_items: service_line_items
58
66
  )
59
67
  else
60
68
  build_unknown_stream_usage(
61
69
  provider: provider_for(request_url),
62
70
  model: model,
63
71
  provider_response_id: response_id,
64
- pricing_mode: pricing_mode
72
+ pricing_mode: pricing_mode,
73
+ service_line_items: service_line_items
65
74
  )
66
75
  end
67
76
  end
@@ -88,7 +97,7 @@ module LlmCostTracker
88
97
 
89
98
  def openai_regional_processing?(request_url:, model:)
90
99
  uri = parsed_uri(request_url)
91
- return false unless %w[us.api.openai.com eu.api.openai.com].include?(uri&.host.to_s.downcase)
100
+ return false unless uri&.host.to_s.downcase.match?(OPENAI_DATA_RESIDENCY_HOST_PATTERN)
92
101
 
93
102
  openai_data_residency_model?(model)
94
103
  end
@@ -98,36 +107,54 @@ module LlmCostTracker
98
107
  end
99
108
 
100
109
  def token_usage(usage:, cache_read:)
110
+ audio_input = audio_input_tokens(usage)
111
+ audio_output = audio_output_tokens(usage)
112
+
101
113
  TokenUsage.build(
102
- input_tokens: regular_input_tokens(usage: usage, cache_read: cache_read),
103
- output_tokens: (usage["completion_tokens"] || usage["output_tokens"]).to_i,
104
- total_tokens: total_tokens(usage: usage, cache_read: cache_read),
114
+ input_tokens: regular_input_tokens(usage: usage, cache_read: cache_read, audio_input: audio_input),
115
+ output_tokens: regular_output_tokens(usage: usage, audio_output: audio_output),
116
+ total_tokens: usage["total_tokens"],
105
117
  cache_read_input_tokens: cache_read,
118
+ audio_input_tokens: audio_input,
119
+ audio_output_tokens: audio_output,
106
120
  hidden_output_tokens: hidden_output_tokens(usage)
107
121
  )
108
122
  end
109
123
 
110
- def regular_input_tokens(usage:, cache_read:)
111
- [(usage["prompt_tokens"] || usage["input_tokens"]).to_i - cache_read.to_i, 0].max
124
+ def regular_input_tokens(usage:, cache_read:, audio_input:)
125
+ [(usage["prompt_tokens"] || usage["input_tokens"]).to_i - cache_read - audio_input, 0].max
126
+ end
127
+
128
+ def regular_output_tokens(usage:, audio_output:)
129
+ [(usage["completion_tokens"] || usage["output_tokens"]).to_i - audio_output, 0].max
112
130
  end
113
131
 
114
132
  def cache_read_input_tokens(usage)
115
- details = usage["prompt_tokens_details"] || usage["input_tokens_details"] || {}
116
- details["cached_tokens"]
133
+ details = input_token_details(usage)
134
+ details["cached_tokens"].to_i
135
+ end
136
+
137
+ def audio_input_tokens(usage)
138
+ details = input_token_details(usage)
139
+ details["audio_tokens"].to_i
117
140
  end
118
141
 
119
142
  def hidden_output_tokens(usage)
120
- details = usage["completion_tokens_details"] || usage["output_tokens_details"] || {}
121
- details["reasoning_tokens"]
143
+ details = output_token_details(usage)
144
+ details["reasoning_tokens"].to_i
122
145
  end
123
146
 
124
- def total_tokens(usage:, cache_read:)
125
- total = usage["total_tokens"]
126
- return total.to_i unless total.nil?
147
+ def audio_output_tokens(usage)
148
+ details = output_token_details(usage)
149
+ details["audio_tokens"].to_i
150
+ end
151
+
152
+ def input_token_details(usage)
153
+ usage["prompt_tokens_details"] || usage["input_tokens_details"] || usage["input_token_details"] || {}
154
+ end
127
155
 
128
- regular_input_tokens(usage: usage, cache_read: cache_read) +
129
- cache_read.to_i +
130
- (usage["completion_tokens"] || usage["output_tokens"]).to_i
156
+ def output_token_details(usage)
157
+ usage["completion_tokens_details"] || usage["output_tokens_details"] || usage["output_token_details"] || {}
131
158
  end
132
159
  end
133
160
  end
@@ -12,7 +12,7 @@ module LlmCostTracker
12
12
  def parse(body)
13
13
  return [] if body.blank?
14
14
 
15
- return parse_json_array(body) if body.lstrip.start_with?("[")
15
+ return parse_json_array(body) if body.match?(/\A\s*\[/)
16
16
 
17
17
  parse_event_stream(body)
18
18
  end
@@ -13,7 +13,7 @@ module LlmCostTracker
13
13
  def find_for_provider(provider)
14
14
  provider_name = provider.to_s.downcase
15
15
  BUILT_INS.find do |parser|
16
- Array(parser.provider_names).map { |name| name.to_s.downcase }.include?(provider_name)
16
+ parser.provider_names.include?(provider_name)
17
17
  end
18
18
  end
19
19
  end
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "metadata": {
3
- "updated_at": "2026-05-01",
3
+ "updated_at": "2026-05-02",
4
4
  "currency": "USD",
5
5
  "unit": "1M tokens",
6
6
  "source_urls": [
@@ -15,13 +15,23 @@
15
15
  "schema_version": 1,
16
16
  "min_gem_version": "0.4.0"
17
17
  },
18
+ "service_charges": {
19
+ "anthropic": {
20
+ "web_search_request": 10.0,
21
+ "code_execution_hour": 0.05
22
+ },
23
+ "openai": {
24
+ "web_search_request": 10.0,
25
+ "file_search_call": 2.5
26
+ }
27
+ },
18
28
  "models": {
19
29
  "anthropic/claude-haiku-4-5": {
20
30
  "input": 1.0,
21
31
  "output": 5.0,
22
32
  "cache_read_input": 0.1,
23
33
  "cache_write_input": 1.25,
24
- "cache_write_1h_input": 2.0,
34
+ "cache_write_extended_input": 2.0,
25
35
  "batch_input": 0.5,
26
36
  "batch_output": 2.5
27
37
  },
@@ -30,7 +40,7 @@
30
40
  "output": 75.0,
31
41
  "cache_read_input": 1.5,
32
42
  "cache_write_input": 18.75,
33
- "cache_write_1h_input": 30.0,
43
+ "cache_write_extended_input": 30.0,
34
44
  "batch_input": 7.5,
35
45
  "batch_output": 37.5
36
46
  },
@@ -39,7 +49,7 @@
39
49
  "output": 75.0,
40
50
  "cache_read_input": 1.5,
41
51
  "cache_write_input": 18.75,
42
- "cache_write_1h_input": 30.0,
52
+ "cache_write_extended_input": 30.0,
43
53
  "batch_input": 7.5,
44
54
  "batch_output": 37.5
45
55
  },
@@ -48,7 +58,7 @@
48
58
  "output": 25.0,
49
59
  "cache_read_input": 0.5,
50
60
  "cache_write_input": 6.25,
51
- "cache_write_1h_input": 10.0,
61
+ "cache_write_extended_input": 10.0,
52
62
  "batch_input": 2.5,
53
63
  "batch_output": 12.5
54
64
  },
@@ -57,24 +67,24 @@
57
67
  "output": 25.0,
58
68
  "cache_read_input": 0.5,
59
69
  "cache_write_input": 6.25,
60
- "cache_write_1h_input": 10.0,
70
+ "cache_write_extended_input": 10.0,
61
71
  "batch_input": 2.5,
62
72
  "batch_output": 12.5,
63
73
  "data_residency_input": 5.5,
64
74
  "data_residency_cache_write_input": 6.875,
65
- "data_residency_cache_write_1h_input": 11.0,
75
+ "data_residency_cache_write_extended_input": 11.0,
66
76
  "data_residency_cache_read_input": 0.55,
67
77
  "data_residency_output": 27.5,
68
78
  "data_residency_batch_input": 2.75,
69
79
  "data_residency_batch_output": 13.75,
70
80
  "fast_input": 30.0,
71
81
  "fast_cache_write_input": 37.5,
72
- "fast_cache_write_1h_input": 60.0,
82
+ "fast_cache_write_extended_input": 60.0,
73
83
  "fast_cache_read_input": 3.0,
74
84
  "fast_output": 150.0,
75
85
  "fast_data_residency_input": 33.0,
76
86
  "fast_data_residency_cache_write_input": 41.25,
77
- "fast_data_residency_cache_write_1h_input": 66.0,
87
+ "fast_data_residency_cache_write_extended_input": 66.0,
78
88
  "fast_data_residency_cache_read_input": 3.3,
79
89
  "fast_data_residency_output": 165.0
80
90
  },
@@ -83,12 +93,12 @@
83
93
  "output": 25.0,
84
94
  "cache_read_input": 0.5,
85
95
  "cache_write_input": 6.25,
86
- "cache_write_1h_input": 10.0,
96
+ "cache_write_extended_input": 10.0,
87
97
  "batch_input": 2.5,
88
98
  "batch_output": 12.5,
89
99
  "data_residency_input": 5.5,
90
100
  "data_residency_cache_write_input": 6.875,
91
- "data_residency_cache_write_1h_input": 11.0,
101
+ "data_residency_cache_write_extended_input": 11.0,
92
102
  "data_residency_cache_read_input": 0.55,
93
103
  "data_residency_output": 27.5,
94
104
  "data_residency_batch_input": 2.75,
@@ -99,7 +109,7 @@
99
109
  "output": 15.0,
100
110
  "cache_read_input": 0.3,
101
111
  "cache_write_input": 3.75,
102
- "cache_write_1h_input": 6.0,
112
+ "cache_write_extended_input": 6.0,
103
113
  "batch_input": 1.5,
104
114
  "batch_output": 7.5
105
115
  },
@@ -108,7 +118,7 @@
108
118
  "output": 15.0,
109
119
  "cache_read_input": 0.3,
110
120
  "cache_write_input": 3.75,
111
- "cache_write_1h_input": 6.0,
121
+ "cache_write_extended_input": 6.0,
112
122
  "batch_input": 1.5,
113
123
  "batch_output": 7.5
114
124
  },
@@ -117,12 +127,12 @@
117
127
  "output": 15.0,
118
128
  "cache_read_input": 0.3,
119
129
  "cache_write_input": 3.75,
120
- "cache_write_1h_input": 6.0,
130
+ "cache_write_extended_input": 6.0,
121
131
  "batch_input": 1.5,
122
132
  "batch_output": 7.5,
123
133
  "data_residency_input": 3.3,
124
134
  "data_residency_cache_write_input": 4.125,
125
- "data_residency_cache_write_1h_input": 6.6,
135
+ "data_residency_cache_write_extended_input": 6.6,
126
136
  "data_residency_cache_read_input": 0.33,
127
137
  "data_residency_output": 16.5,
128
138
  "data_residency_batch_input": 1.65,
@@ -134,7 +144,9 @@
134
144
  "output": 0.4,
135
145
  "batch_input": 0.05,
136
146
  "batch_output": 0.2,
137
- "batch_cache_read_input": 0.025
147
+ "batch_cache_read_input": 0.025,
148
+ "audio_input": 0.7,
149
+ "batch_audio_input": 0.35
138
150
  },
139
151
  "gemini/gemini-2.0-flash-lite": {
140
152
  "input": 0.075,
@@ -154,7 +166,11 @@
154
166
  "flex_cache_read_input": 0.03,
155
167
  "priority_input": 0.54,
156
168
  "priority_output": 4.5,
157
- "priority_cache_read_input": 0.054
169
+ "priority_cache_read_input": 0.054,
170
+ "audio_input": 1.0,
171
+ "batch_audio_input": 0.5,
172
+ "flex_audio_input": 0.5,
173
+ "priority_audio_input": 1.8
158
174
  },
159
175
  "gemini/gemini-2.5-flash-lite": {
160
176
  "input": 0.1,
@@ -168,7 +184,11 @@
168
184
  "flex_cache_read_input": 0.01,
169
185
  "priority_input": 0.18,
170
186
  "priority_output": 0.72,
171
- "priority_cache_read_input": 0.018
187
+ "priority_cache_read_input": 0.018,
188
+ "audio_input": 0.3,
189
+ "batch_audio_input": 0.15,
190
+ "flex_audio_input": 0.15,
191
+ "priority_audio_input": 0.54
172
192
  },
173
193
  "gemini/gemini-2.5-pro": {
174
194
  "input": 1.25,
@@ -309,6 +329,71 @@
309
329
  "priority_output": 1.0,
310
330
  "priority_cache_read_input": 0.125
311
331
  },
332
+ "openai/gpt-4o-realtime-preview": {
333
+ "input": 5.0,
334
+ "cache_read_input": 2.5,
335
+ "audio_input": 40.0,
336
+ "output": 20.0,
337
+ "audio_output": 80.0
338
+ },
339
+ "openai/gpt-4o-mini-realtime-preview": {
340
+ "input": 0.6,
341
+ "cache_read_input": 0.3,
342
+ "audio_input": 10.0,
343
+ "output": 2.4,
344
+ "audio_output": 20.0
345
+ },
346
+ "openai/gpt-realtime": {
347
+ "input": 4.0,
348
+ "cache_read_input": 0.4,
349
+ "audio_input": 32.0,
350
+ "output": 16.0,
351
+ "audio_output": 64.0
352
+ },
353
+ "openai/gpt-realtime-1.5": {
354
+ "input": 4.0,
355
+ "cache_read_input": 0.4,
356
+ "audio_input": 32.0,
357
+ "output": 16.0,
358
+ "audio_output": 64.0
359
+ },
360
+ "openai/gpt-realtime-mini": {
361
+ "input": 0.6,
362
+ "cache_read_input": 0.06,
363
+ "audio_input": 10.0,
364
+ "output": 2.4,
365
+ "audio_output": 20.0
366
+ },
367
+ "openai/gpt-audio-1.5": {
368
+ "input": 2.5,
369
+ "audio_input": 32.0,
370
+ "output": 10.0,
371
+ "audio_output": 64.0
372
+ },
373
+ "openai/gpt-audio-mini": {
374
+ "input": 0.6,
375
+ "audio_input": 10.0,
376
+ "output": 2.4,
377
+ "audio_output": 20.0
378
+ },
379
+ "openai/gpt-audio": {
380
+ "input": 2.5,
381
+ "audio_input": 32.0,
382
+ "output": 10.0,
383
+ "audio_output": 64.0
384
+ },
385
+ "openai/gpt-4o-audio-preview": {
386
+ "input": 2.5,
387
+ "audio_input": 40.0,
388
+ "output": 10.0,
389
+ "audio_output": 80.0
390
+ },
391
+ "openai/gpt-4o-mini-audio-preview": {
392
+ "input": 0.15,
393
+ "audio_input": 10.0,
394
+ "output": 0.6,
395
+ "audio_output": 20.0
396
+ },
312
397
  "openai/gpt-5": {
313
398
  "input": 1.25,
314
399
  "output": 10.0,
@@ -672,7 +757,7 @@
672
757
  "anthropic/claude-haiku-3-5": {
673
758
  "input": 0.8,
674
759
  "cache_write_input": 1.0,
675
- "cache_write_1h_input": 1.6,
760
+ "cache_write_extended_input": 1.6,
676
761
  "cache_read_input": 0.08,
677
762
  "output": 4.0,
678
763
  "batch_input": 0.4,
@@ -681,7 +766,7 @@
681
766
  "anthropic/claude-haiku-3": {
682
767
  "input": 0.25,
683
768
  "cache_write_input": 0.3,
684
- "cache_write_1h_input": 0.5,
769
+ "cache_write_extended_input": 0.5,
685
770
  "cache_read_input": 0.03,
686
771
  "output": 1.25,
687
772
  "batch_input": 0.125,
@@ -1,18 +1,17 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "components"
3
+ require_relative "../billing/components"
4
4
 
5
5
  module LlmCostTracker
6
6
  module Pricing
7
7
  module EffectivePrices
8
8
  class << self
9
9
  def call(usage:, prices:, pricing_mode:)
10
- quantities = usage.price_quantities
11
10
  context_tier = context_tier?(usage: usage, prices: prices)
12
11
 
13
- Pricing::COMPONENTS.to_h do |component|
14
- price_key = component.price_key
15
- tokens = quantities.fetch(price_key)
12
+ Billing::Components::TOKEN_PRICED.to_h do |component|
13
+ price_key = component.key
14
+ tokens = usage.public_send(component.token_key)
16
15
  price = if tokens.positive?
17
16
  price_for(
18
17
  prices: prices,
@@ -30,29 +29,67 @@ module LlmCostTracker
30
29
  private
31
30
 
32
31
  def price_for(prices:, key:, pricing_mode:, context_tier:)
33
- mode = Pricing.normalize_mode(pricing_mode)
34
- return contextual_price(prices: prices, key: key, context_tier: context_tier) unless mode
32
+ return contextual_price(prices: prices, key: key, context_tier: context_tier) unless pricing_mode
35
33
 
36
- contextual_price(prices: prices, key: :"#{mode}_#{key}", context_tier: context_tier) ||
37
- derived_mode_price(prices: prices, key: key, mode: mode, context_tier: context_tier)
34
+ orderings = mode_orderings_for(pricing_mode)
35
+ orderings.each do |mode|
36
+ direct = contextual_price(prices: prices, key: :"#{mode}_#{key}", context_tier: context_tier)
37
+ return direct if direct
38
+ end
39
+ return nil if %i[input output].include?(key)
40
+
41
+ derived_mode_price(prices: prices, key: key, modes: orderings, context_tier: context_tier)
38
42
  end
39
43
 
44
+ def mode_orderings_for(pricing_mode)
45
+ mode_string = pricing_mode.to_s
46
+ return [mode_string] unless mode_string.include?("_")
47
+
48
+ tokens = tokenize_mode(mode_string)
49
+ return [mode_string] if tokens.size <= 1
50
+
51
+ [mode_string, *tokens.permutation.map { |permutation| permutation.join("_") }].uniq
52
+ end
53
+
54
+ def tokenize_mode(mode_string)
55
+ remaining = mode_string.dup
56
+ tokens = []
57
+ loop do
58
+ break if remaining.empty?
59
+
60
+ compound = COMPOUND_MODE_TOKENS.find { |token| remaining == token || remaining.start_with?("#{token}_") }
61
+ if compound
62
+ tokens << compound
63
+ remaining = remaining.delete_prefix(compound).delete_prefix("_")
64
+ else
65
+ first, _, rest = remaining.partition("_")
66
+ tokens << first
67
+ remaining = rest
68
+ end
69
+ end
70
+ tokens
71
+ end
72
+
73
+ COMPOUND_MODE_TOKENS = %w[data_residency].freeze
74
+ private_constant :COMPOUND_MODE_TOKENS
75
+
40
76
  def contextual_price(prices:, key:, context_tier:)
41
77
  return prices[key] unless context_tier
42
78
 
43
79
  prices[:"above_context_#{key}"]
44
80
  end
45
81
 
46
- def derived_mode_price(prices:, key:, mode:, context_tier:)
82
+ def derived_mode_price(prices:, key:, modes:, context_tier:)
47
83
  standard_price = contextual_price(prices: prices, key: key, context_tier: context_tier)
48
- return nil unless standard_price
49
-
50
- base_key = key == :output ? :output : :input
51
- base_price = contextual_price(prices: prices, key: base_key, context_tier: context_tier)
52
- mode_base_price = contextual_price(prices: prices, key: :"#{mode}_#{base_key}", context_tier: context_tier)
53
- return nil unless base_price && mode_base_price
84
+ base_price = contextual_price(prices: prices, key: :input, context_tier: context_tier)
85
+ return nil unless standard_price && base_price
86
+ return nil if base_price.zero?
54
87
 
55
- standard_price * (mode_base_price.to_f / base_price)
88
+ modes.each do |mode|
89
+ mode_base_price = contextual_price(prices: prices, key: :"#{mode}_input", context_tier: context_tier)
90
+ return standard_price * (mode_base_price / base_price) if mode_base_price
91
+ end
92
+ nil
56
93
  end
57
94
 
58
95
  def context_tier?(usage:, prices:)
@@ -62,8 +99,9 @@ module LlmCostTracker
62
99
  input_tokens = usage.input_tokens +
63
100
  usage.cache_read_input_tokens +
64
101
  usage.cache_write_input_tokens +
65
- usage.cache_write_1h_input_tokens
66
- input_tokens > threshold.to_i
102
+ usage.cache_write_extended_input_tokens +
103
+ usage.audio_input_tokens
104
+ input_tokens > threshold
67
105
  end
68
106
  end
69
107
  end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "../token_usage"
3
4
  require_relative "effective_prices"
4
5
 
5
6
  module LlmCostTracker
@@ -33,7 +34,7 @@ module LlmCostTracker
33
34
 
34
35
  module Explainer
35
36
  class << self
36
- def call(provider:, model:, token_usage:, pricing_mode: nil)
37
+ def call(provider:, model:, tokens:, pricing_mode: nil)
37
38
  match = Lookup.call(provider: provider, model: model)
38
39
 
39
40
  explanation(
@@ -41,7 +42,7 @@ module LlmCostTracker
41
42
  model: model,
42
43
  pricing_mode: pricing_mode,
43
44
  match: match,
44
- usage: token_usage
45
+ usage: TokenUsage.build_from_tokens(tokens)
45
46
  )
46
47
  end
47
48
 
@@ -50,9 +51,7 @@ module LlmCostTracker
50
51
  def explanation(provider:, model:, pricing_mode:, match:, usage:)
51
52
  prices = match&.prices
52
53
  pricing_mode = Pricing.normalize_mode(pricing_mode)
53
- effective = if prices && usage
54
- EffectivePrices.call(usage: usage, prices: prices, pricing_mode: pricing_mode)
55
- end
54
+ effective = EffectivePrices.call(usage: usage, prices: prices, pricing_mode: pricing_mode) if prices
56
55
 
57
56
  Explanation.new(
58
57
  provider: provider.to_s,