llm_cost_tracker 0.7.2 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ruby-version +1 -0
- data/CHANGELOG.md +72 -1
- data/README.md +58 -221
- data/app/assets/llm_cost_tracker/application.css +218 -41
- data/app/controllers/llm_cost_tracker/application_controller.rb +30 -17
- data/app/controllers/llm_cost_tracker/assets_controller.rb +11 -1
- data/app/controllers/llm_cost_tracker/calls_controller.rb +19 -14
- data/app/controllers/llm_cost_tracker/data_quality_controller.rb +10 -2
- data/app/helpers/llm_cost_tracker/application_helper.rb +11 -24
- data/app/helpers/llm_cost_tracker/dashboard_filter_helper.rb +3 -21
- data/app/helpers/llm_cost_tracker/dashboard_filter_options_helper.rb +4 -4
- data/app/helpers/llm_cost_tracker/dashboard_query_helper.rb +1 -1
- data/app/helpers/llm_cost_tracker/token_usage_helper.rb +20 -7
- data/app/models/llm_cost_tracker/call.rb +169 -0
- data/app/models/llm_cost_tracker/call_line_item.rb +22 -0
- data/app/models/llm_cost_tracker/call_rollup.rb +9 -0
- data/app/models/llm_cost_tracker/call_tag.rb +16 -0
- data/app/models/llm_cost_tracker/ingestion/inbox_entry.rb +13 -0
- data/app/models/llm_cost_tracker/ingestion/lease.rb +1 -1
- data/app/models/llm_cost_tracker/provider_invoice.rb +9 -0
- data/app/services/llm_cost_tracker/dashboard/data_quality.rb +125 -34
- data/app/services/llm_cost_tracker/dashboard/date_range.rb +1 -1
- data/app/services/llm_cost_tracker/dashboard/filter.rb +2 -2
- data/app/services/llm_cost_tracker/dashboard/overview_stats.rb +74 -21
- data/app/services/llm_cost_tracker/dashboard/pagination.rb +6 -4
- data/app/services/llm_cost_tracker/dashboard/params.rb +8 -2
- data/app/services/llm_cost_tracker/dashboard/provider_breakdown.rb +1 -1
- data/app/services/llm_cost_tracker/dashboard/spend_anomaly.rb +4 -3
- data/app/services/llm_cost_tracker/dashboard/tag_breakdown.rb +42 -9
- data/app/services/llm_cost_tracker/dashboard/tag_key_explorer.rb +14 -37
- data/app/services/llm_cost_tracker/dashboard/time_series.rb +1 -1
- data/app/services/llm_cost_tracker/dashboard/top_models.rb +1 -1
- data/app/views/llm_cost_tracker/calls/index.html.erb +33 -75
- data/app/views/llm_cost_tracker/calls/show.html.erb +62 -7
- data/app/views/llm_cost_tracker/dashboard/index.html.erb +9 -50
- data/app/views/llm_cost_tracker/data_quality/index.html.erb +103 -126
- data/app/views/llm_cost_tracker/errors/database.html.erb +1 -1
- data/app/views/llm_cost_tracker/models/index.html.erb +18 -50
- data/app/views/llm_cost_tracker/shared/_filters.html.erb +63 -0
- data/app/views/llm_cost_tracker/shared/_sort.html.erb +13 -0
- data/app/views/llm_cost_tracker/shared/setup_required.html.erb +1 -1
- data/app/views/llm_cost_tracker/tags/index.html.erb +3 -34
- data/app/views/llm_cost_tracker/tags/show.html.erb +5 -37
- data/lib/llm_cost_tracker/billing/components.rb +53 -0
- data/lib/llm_cost_tracker/billing/components.yml +117 -0
- data/lib/llm_cost_tracker/billing/cost_status.rb +45 -0
- data/lib/llm_cost_tracker/billing/line_item.rb +189 -0
- data/lib/llm_cost_tracker/budget.rb +23 -35
- data/lib/llm_cost_tracker/capture/stream_collector.rb +47 -33
- data/lib/llm_cost_tracker/configuration.rb +36 -19
- data/lib/llm_cost_tracker/doctor/cost_drift_check.rb +54 -0
- data/lib/llm_cost_tracker/doctor/ingestion_check.rb +24 -32
- data/lib/llm_cost_tracker/doctor/legacy_audit_check.rb +36 -0
- data/lib/llm_cost_tracker/doctor/legacy_billing_status_check.rb +22 -0
- data/lib/llm_cost_tracker/doctor/price_check.rb +2 -2
- data/lib/llm_cost_tracker/doctor/pricing_snapshot_drift_check.rb +85 -0
- data/lib/llm_cost_tracker/doctor/probe.rb +17 -0
- data/lib/llm_cost_tracker/doctor/schema_check.rb +31 -0
- data/lib/llm_cost_tracker/doctor.rb +43 -45
- data/lib/llm_cost_tracker/errors.rb +5 -19
- data/lib/llm_cost_tracker/event.rb +10 -2
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/install_generator.rb +4 -2
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/prices_generator.rb +2 -6
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_calls.rb.erb +157 -0
- data/lib/llm_cost_tracker/ingestion/batch.rb +11 -12
- data/lib/llm_cost_tracker/ingestion/inbox.rb +39 -23
- data/lib/llm_cost_tracker/ingestion/worker.rb +14 -5
- data/lib/llm_cost_tracker/ingestion.rb +28 -22
- data/lib/llm_cost_tracker/integrations/anthropic.rb +45 -38
- data/lib/llm_cost_tracker/integrations/base.rb +36 -29
- data/lib/llm_cost_tracker/integrations/openai.rb +85 -40
- data/lib/llm_cost_tracker/integrations/ruby_llm.rb +5 -5
- data/lib/llm_cost_tracker/integrations.rb +2 -2
- data/lib/llm_cost_tracker/ledger/period/totals.rb +12 -9
- data/lib/llm_cost_tracker/ledger/period.rb +5 -5
- data/lib/llm_cost_tracker/ledger/rollups/upsert_sql.rb +4 -10
- data/lib/llm_cost_tracker/ledger/rollups.rb +76 -25
- data/lib/llm_cost_tracker/ledger/schema/adapter.rb +18 -0
- data/lib/llm_cost_tracker/ledger/schema/call_line_items.rb +50 -0
- data/lib/llm_cost_tracker/ledger/schema/call_rollups.rb +37 -0
- data/lib/llm_cost_tracker/ledger/schema/call_tags.rb +26 -0
- data/lib/llm_cost_tracker/ledger/schema/calls.rb +34 -23
- data/lib/llm_cost_tracker/ledger/schema/provider_invoices.rb +57 -0
- data/lib/llm_cost_tracker/ledger/store.rb +110 -18
- data/lib/llm_cost_tracker/ledger/tags/query.rb +5 -11
- data/lib/llm_cost_tracker/ledger/tags/sql.rb +27 -14
- data/lib/llm_cost_tracker/ledger.rb +4 -2
- data/lib/llm_cost_tracker/logging.rb +2 -5
- data/lib/llm_cost_tracker/middleware/faraday.rb +7 -6
- data/lib/llm_cost_tracker/parsers/anthropic.rb +52 -7
- data/lib/llm_cost_tracker/parsers/base.rb +8 -3
- data/lib/llm_cost_tracker/parsers/gemini.rb +101 -15
- data/lib/llm_cost_tracker/parsers/openai_compatible.rb +10 -2
- data/lib/llm_cost_tracker/parsers/openai_service_charges.rb +87 -0
- data/lib/llm_cost_tracker/parsers/openai_usage.rb +48 -21
- data/lib/llm_cost_tracker/parsers/sse.rb +1 -1
- data/lib/llm_cost_tracker/parsers.rb +1 -1
- data/lib/llm_cost_tracker/prices.json +105 -20
- data/lib/llm_cost_tracker/pricing/effective_prices.rb +57 -19
- data/lib/llm_cost_tracker/pricing/explainer.rb +4 -5
- data/lib/llm_cost_tracker/pricing/lookup.rb +38 -34
- data/lib/llm_cost_tracker/pricing/registry.rb +65 -45
- data/lib/llm_cost_tracker/pricing/service_charges.rb +204 -0
- data/lib/llm_cost_tracker/pricing/sync/fetcher.rb +26 -17
- data/lib/llm_cost_tracker/pricing/sync/registry_diff.rb +6 -15
- data/lib/llm_cost_tracker/pricing/sync.rb +57 -10
- data/lib/llm_cost_tracker/pricing/sync_change_printer.rb +32 -0
- data/lib/llm_cost_tracker/pricing.rb +190 -26
- data/lib/llm_cost_tracker/railtie.rb +0 -8
- data/lib/llm_cost_tracker/report/data.rb +16 -8
- data/lib/llm_cost_tracker/report.rb +0 -4
- data/lib/llm_cost_tracker/retention.rb +8 -8
- data/lib/llm_cost_tracker/tags/context.rb +2 -4
- data/lib/llm_cost_tracker/tags/key.rb +4 -0
- data/lib/llm_cost_tracker/tags/sanitizer.rb +12 -17
- data/lib/llm_cost_tracker/timing.rb +15 -0
- data/lib/llm_cost_tracker/token_usage.rb +56 -42
- data/lib/llm_cost_tracker/tracker.rb +67 -24
- data/lib/llm_cost_tracker/usage_capture.rb +29 -8
- data/lib/llm_cost_tracker/version.rb +1 -1
- data/lib/llm_cost_tracker.rb +36 -35
- data/lib/tasks/llm_cost_tracker.rake +22 -17
- metadata +36 -41
- data/app/models/llm_cost_tracker/ingestion/event.rb +0 -13
- data/app/models/llm_cost_tracker/ledger/call.rb +0 -45
- data/app/models/llm_cost_tracker/ledger/call_metrics.rb +0 -66
- data/app/models/llm_cost_tracker/ledger/period/grouping.rb +0 -71
- data/app/models/llm_cost_tracker/ledger/period/total.rb +0 -13
- data/app/models/llm_cost_tracker/ledger/tags/accessors.rb +0 -19
- data/lib/llm_cost_tracker/configuration/instrumentation.rb +0 -33
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_ingestion_generator.rb +0 -29
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_latency_ms_generator.rb +0 -29
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_period_totals_generator.rb +0 -29
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_provider_response_id_generator.rb +0 -29
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_streaming_generator.rb +0 -29
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_token_usage_generator.rb +0 -42
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_ingestion_to_llm_cost_tracker.rb.erb +0 -33
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_latency_ms_to_llm_api_calls.rb.erb +0 -9
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_period_totals_to_llm_cost_tracker.rb.erb +0 -104
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_provider_response_id_to_llm_api_calls.rb.erb +0 -15
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_streaming_to_llm_api_calls.rb.erb +0 -21
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_token_usage_to_llm_api_calls.rb.erb +0 -22
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_api_calls.rb.erb +0 -83
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_cost_precision.rb.erb +0 -26
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_tags_to_jsonb.rb.erb +0 -44
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_cost_precision_generator.rb +0 -29
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_tags_to_jsonb_generator.rb +0 -29
- data/lib/llm_cost_tracker/ledger/rollups/batch.rb +0 -43
- data/lib/llm_cost_tracker/ledger/schema/period_totals.rb +0 -32
- data/lib/llm_cost_tracker/pricing/components.rb +0 -37
- data/lib/llm_cost_tracker/pricing/sync/registry_loader.rb +0 -63
|
@@ -1,8 +1,14 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative "openai_service_charges"
|
|
4
|
+
|
|
3
5
|
module LlmCostTracker
|
|
4
6
|
module Parsers
|
|
5
7
|
module OpenaiUsage
|
|
8
|
+
include OpenaiServiceCharges
|
|
9
|
+
|
|
10
|
+
OPENAI_DATA_RESIDENCY_HOST_PATTERN = /\A[a-z]{2,3}\.api\.openai\.com\z/
|
|
11
|
+
|
|
6
12
|
private
|
|
7
13
|
|
|
8
14
|
def parse_openai_usage(request_url:, request_body:, response_status:, response_body:)
|
|
@@ -27,7 +33,8 @@ module LlmCostTracker
|
|
|
27
33
|
),
|
|
28
34
|
model: model,
|
|
29
35
|
token_usage: token_usage(usage: usage, cache_read: cache_read),
|
|
30
|
-
usage_source: :response
|
|
36
|
+
usage_source: :response,
|
|
37
|
+
service_line_items: openai_service_line_items(response)
|
|
31
38
|
)
|
|
32
39
|
end
|
|
33
40
|
|
|
@@ -35,8 +42,7 @@ module LlmCostTracker
|
|
|
35
42
|
return nil unless response_status == 200
|
|
36
43
|
|
|
37
44
|
request = safe_json_parse(request_body)
|
|
38
|
-
model =
|
|
39
|
-
find_event_value(events) { |data| data["model"] || data.dig("response", "model") } || request["model"]
|
|
45
|
+
model = find_event_value(events) { |data| data["model"] || data.dig("response", "model") } || request["model"]
|
|
40
46
|
usage = detect_stream_usage(events)
|
|
41
47
|
response_id = find_event_value(events) { |data| data["id"] || data.dig("response", "id") }
|
|
42
48
|
pricing_mode = pricing_mode(
|
|
@@ -44,6 +50,7 @@ module LlmCostTracker
|
|
|
44
50
|
model: model,
|
|
45
51
|
service_tier: stream_pricing_mode(events) || request["service_tier"]
|
|
46
52
|
)
|
|
53
|
+
service_line_items = openai_stream_service_line_items(events)
|
|
47
54
|
|
|
48
55
|
if usage
|
|
49
56
|
cache_read = cache_read_input_tokens(usage)
|
|
@@ -54,14 +61,16 @@ module LlmCostTracker
|
|
|
54
61
|
model: model,
|
|
55
62
|
token_usage: token_usage(usage: usage, cache_read: cache_read),
|
|
56
63
|
stream: true,
|
|
57
|
-
usage_source: :stream_final
|
|
64
|
+
usage_source: :stream_final,
|
|
65
|
+
service_line_items: service_line_items
|
|
58
66
|
)
|
|
59
67
|
else
|
|
60
68
|
build_unknown_stream_usage(
|
|
61
69
|
provider: provider_for(request_url),
|
|
62
70
|
model: model,
|
|
63
71
|
provider_response_id: response_id,
|
|
64
|
-
pricing_mode: pricing_mode
|
|
72
|
+
pricing_mode: pricing_mode,
|
|
73
|
+
service_line_items: service_line_items
|
|
65
74
|
)
|
|
66
75
|
end
|
|
67
76
|
end
|
|
@@ -88,7 +97,7 @@ module LlmCostTracker
|
|
|
88
97
|
|
|
89
98
|
def openai_regional_processing?(request_url:, model:)
|
|
90
99
|
uri = parsed_uri(request_url)
|
|
91
|
-
return false unless
|
|
100
|
+
return false unless uri&.host.to_s.downcase.match?(OPENAI_DATA_RESIDENCY_HOST_PATTERN)
|
|
92
101
|
|
|
93
102
|
openai_data_residency_model?(model)
|
|
94
103
|
end
|
|
@@ -98,36 +107,54 @@ module LlmCostTracker
|
|
|
98
107
|
end
|
|
99
108
|
|
|
100
109
|
def token_usage(usage:, cache_read:)
|
|
110
|
+
audio_input = audio_input_tokens(usage)
|
|
111
|
+
audio_output = audio_output_tokens(usage)
|
|
112
|
+
|
|
101
113
|
TokenUsage.build(
|
|
102
|
-
input_tokens: regular_input_tokens(usage: usage, cache_read: cache_read),
|
|
103
|
-
output_tokens: (usage
|
|
104
|
-
total_tokens: total_tokens
|
|
114
|
+
input_tokens: regular_input_tokens(usage: usage, cache_read: cache_read, audio_input: audio_input),
|
|
115
|
+
output_tokens: regular_output_tokens(usage: usage, audio_output: audio_output),
|
|
116
|
+
total_tokens: usage["total_tokens"],
|
|
105
117
|
cache_read_input_tokens: cache_read,
|
|
118
|
+
audio_input_tokens: audio_input,
|
|
119
|
+
audio_output_tokens: audio_output,
|
|
106
120
|
hidden_output_tokens: hidden_output_tokens(usage)
|
|
107
121
|
)
|
|
108
122
|
end
|
|
109
123
|
|
|
110
|
-
def regular_input_tokens(usage:, cache_read:)
|
|
111
|
-
[(usage["prompt_tokens"] || usage["input_tokens"]).to_i - cache_read
|
|
124
|
+
def regular_input_tokens(usage:, cache_read:, audio_input:)
|
|
125
|
+
[(usage["prompt_tokens"] || usage["input_tokens"]).to_i - cache_read - audio_input, 0].max
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def regular_output_tokens(usage:, audio_output:)
|
|
129
|
+
[(usage["completion_tokens"] || usage["output_tokens"]).to_i - audio_output, 0].max
|
|
112
130
|
end
|
|
113
131
|
|
|
114
132
|
def cache_read_input_tokens(usage)
|
|
115
|
-
details = usage
|
|
116
|
-
details["cached_tokens"]
|
|
133
|
+
details = input_token_details(usage)
|
|
134
|
+
details["cached_tokens"].to_i
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def audio_input_tokens(usage)
|
|
138
|
+
details = input_token_details(usage)
|
|
139
|
+
details["audio_tokens"].to_i
|
|
117
140
|
end
|
|
118
141
|
|
|
119
142
|
def hidden_output_tokens(usage)
|
|
120
|
-
details = usage
|
|
121
|
-
details["reasoning_tokens"]
|
|
143
|
+
details = output_token_details(usage)
|
|
144
|
+
details["reasoning_tokens"].to_i
|
|
122
145
|
end
|
|
123
146
|
|
|
124
|
-
def
|
|
125
|
-
|
|
126
|
-
|
|
147
|
+
def audio_output_tokens(usage)
|
|
148
|
+
details = output_token_details(usage)
|
|
149
|
+
details["audio_tokens"].to_i
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def input_token_details(usage)
|
|
153
|
+
usage["prompt_tokens_details"] || usage["input_tokens_details"] || usage["input_token_details"] || {}
|
|
154
|
+
end
|
|
127
155
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
(usage["completion_tokens"] || usage["output_tokens"]).to_i
|
|
156
|
+
def output_token_details(usage)
|
|
157
|
+
usage["completion_tokens_details"] || usage["output_tokens_details"] || usage["output_token_details"] || {}
|
|
131
158
|
end
|
|
132
159
|
end
|
|
133
160
|
end
|
|
@@ -13,7 +13,7 @@ module LlmCostTracker
|
|
|
13
13
|
def find_for_provider(provider)
|
|
14
14
|
provider_name = provider.to_s.downcase
|
|
15
15
|
BUILT_INS.find do |parser|
|
|
16
|
-
|
|
16
|
+
parser.provider_names.include?(provider_name)
|
|
17
17
|
end
|
|
18
18
|
end
|
|
19
19
|
end
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"metadata": {
|
|
3
|
-
"updated_at": "2026-05-
|
|
3
|
+
"updated_at": "2026-05-02",
|
|
4
4
|
"currency": "USD",
|
|
5
5
|
"unit": "1M tokens",
|
|
6
6
|
"source_urls": [
|
|
@@ -15,13 +15,23 @@
|
|
|
15
15
|
"schema_version": 1,
|
|
16
16
|
"min_gem_version": "0.4.0"
|
|
17
17
|
},
|
|
18
|
+
"service_charges": {
|
|
19
|
+
"anthropic": {
|
|
20
|
+
"web_search_request": 10.0,
|
|
21
|
+
"code_execution_hour": 0.05
|
|
22
|
+
},
|
|
23
|
+
"openai": {
|
|
24
|
+
"web_search_request": 10.0,
|
|
25
|
+
"file_search_call": 2.5
|
|
26
|
+
}
|
|
27
|
+
},
|
|
18
28
|
"models": {
|
|
19
29
|
"anthropic/claude-haiku-4-5": {
|
|
20
30
|
"input": 1.0,
|
|
21
31
|
"output": 5.0,
|
|
22
32
|
"cache_read_input": 0.1,
|
|
23
33
|
"cache_write_input": 1.25,
|
|
24
|
-
"
|
|
34
|
+
"cache_write_extended_input": 2.0,
|
|
25
35
|
"batch_input": 0.5,
|
|
26
36
|
"batch_output": 2.5
|
|
27
37
|
},
|
|
@@ -30,7 +40,7 @@
|
|
|
30
40
|
"output": 75.0,
|
|
31
41
|
"cache_read_input": 1.5,
|
|
32
42
|
"cache_write_input": 18.75,
|
|
33
|
-
"
|
|
43
|
+
"cache_write_extended_input": 30.0,
|
|
34
44
|
"batch_input": 7.5,
|
|
35
45
|
"batch_output": 37.5
|
|
36
46
|
},
|
|
@@ -39,7 +49,7 @@
|
|
|
39
49
|
"output": 75.0,
|
|
40
50
|
"cache_read_input": 1.5,
|
|
41
51
|
"cache_write_input": 18.75,
|
|
42
|
-
"
|
|
52
|
+
"cache_write_extended_input": 30.0,
|
|
43
53
|
"batch_input": 7.5,
|
|
44
54
|
"batch_output": 37.5
|
|
45
55
|
},
|
|
@@ -48,7 +58,7 @@
|
|
|
48
58
|
"output": 25.0,
|
|
49
59
|
"cache_read_input": 0.5,
|
|
50
60
|
"cache_write_input": 6.25,
|
|
51
|
-
"
|
|
61
|
+
"cache_write_extended_input": 10.0,
|
|
52
62
|
"batch_input": 2.5,
|
|
53
63
|
"batch_output": 12.5
|
|
54
64
|
},
|
|
@@ -57,24 +67,24 @@
|
|
|
57
67
|
"output": 25.0,
|
|
58
68
|
"cache_read_input": 0.5,
|
|
59
69
|
"cache_write_input": 6.25,
|
|
60
|
-
"
|
|
70
|
+
"cache_write_extended_input": 10.0,
|
|
61
71
|
"batch_input": 2.5,
|
|
62
72
|
"batch_output": 12.5,
|
|
63
73
|
"data_residency_input": 5.5,
|
|
64
74
|
"data_residency_cache_write_input": 6.875,
|
|
65
|
-
"
|
|
75
|
+
"data_residency_cache_write_extended_input": 11.0,
|
|
66
76
|
"data_residency_cache_read_input": 0.55,
|
|
67
77
|
"data_residency_output": 27.5,
|
|
68
78
|
"data_residency_batch_input": 2.75,
|
|
69
79
|
"data_residency_batch_output": 13.75,
|
|
70
80
|
"fast_input": 30.0,
|
|
71
81
|
"fast_cache_write_input": 37.5,
|
|
72
|
-
"
|
|
82
|
+
"fast_cache_write_extended_input": 60.0,
|
|
73
83
|
"fast_cache_read_input": 3.0,
|
|
74
84
|
"fast_output": 150.0,
|
|
75
85
|
"fast_data_residency_input": 33.0,
|
|
76
86
|
"fast_data_residency_cache_write_input": 41.25,
|
|
77
|
-
"
|
|
87
|
+
"fast_data_residency_cache_write_extended_input": 66.0,
|
|
78
88
|
"fast_data_residency_cache_read_input": 3.3,
|
|
79
89
|
"fast_data_residency_output": 165.0
|
|
80
90
|
},
|
|
@@ -83,12 +93,12 @@
|
|
|
83
93
|
"output": 25.0,
|
|
84
94
|
"cache_read_input": 0.5,
|
|
85
95
|
"cache_write_input": 6.25,
|
|
86
|
-
"
|
|
96
|
+
"cache_write_extended_input": 10.0,
|
|
87
97
|
"batch_input": 2.5,
|
|
88
98
|
"batch_output": 12.5,
|
|
89
99
|
"data_residency_input": 5.5,
|
|
90
100
|
"data_residency_cache_write_input": 6.875,
|
|
91
|
-
"
|
|
101
|
+
"data_residency_cache_write_extended_input": 11.0,
|
|
92
102
|
"data_residency_cache_read_input": 0.55,
|
|
93
103
|
"data_residency_output": 27.5,
|
|
94
104
|
"data_residency_batch_input": 2.75,
|
|
@@ -99,7 +109,7 @@
|
|
|
99
109
|
"output": 15.0,
|
|
100
110
|
"cache_read_input": 0.3,
|
|
101
111
|
"cache_write_input": 3.75,
|
|
102
|
-
"
|
|
112
|
+
"cache_write_extended_input": 6.0,
|
|
103
113
|
"batch_input": 1.5,
|
|
104
114
|
"batch_output": 7.5
|
|
105
115
|
},
|
|
@@ -108,7 +118,7 @@
|
|
|
108
118
|
"output": 15.0,
|
|
109
119
|
"cache_read_input": 0.3,
|
|
110
120
|
"cache_write_input": 3.75,
|
|
111
|
-
"
|
|
121
|
+
"cache_write_extended_input": 6.0,
|
|
112
122
|
"batch_input": 1.5,
|
|
113
123
|
"batch_output": 7.5
|
|
114
124
|
},
|
|
@@ -117,12 +127,12 @@
|
|
|
117
127
|
"output": 15.0,
|
|
118
128
|
"cache_read_input": 0.3,
|
|
119
129
|
"cache_write_input": 3.75,
|
|
120
|
-
"
|
|
130
|
+
"cache_write_extended_input": 6.0,
|
|
121
131
|
"batch_input": 1.5,
|
|
122
132
|
"batch_output": 7.5,
|
|
123
133
|
"data_residency_input": 3.3,
|
|
124
134
|
"data_residency_cache_write_input": 4.125,
|
|
125
|
-
"
|
|
135
|
+
"data_residency_cache_write_extended_input": 6.6,
|
|
126
136
|
"data_residency_cache_read_input": 0.33,
|
|
127
137
|
"data_residency_output": 16.5,
|
|
128
138
|
"data_residency_batch_input": 1.65,
|
|
@@ -134,7 +144,9 @@
|
|
|
134
144
|
"output": 0.4,
|
|
135
145
|
"batch_input": 0.05,
|
|
136
146
|
"batch_output": 0.2,
|
|
137
|
-
"batch_cache_read_input": 0.025
|
|
147
|
+
"batch_cache_read_input": 0.025,
|
|
148
|
+
"audio_input": 0.7,
|
|
149
|
+
"batch_audio_input": 0.35
|
|
138
150
|
},
|
|
139
151
|
"gemini/gemini-2.0-flash-lite": {
|
|
140
152
|
"input": 0.075,
|
|
@@ -154,7 +166,11 @@
|
|
|
154
166
|
"flex_cache_read_input": 0.03,
|
|
155
167
|
"priority_input": 0.54,
|
|
156
168
|
"priority_output": 4.5,
|
|
157
|
-
"priority_cache_read_input": 0.054
|
|
169
|
+
"priority_cache_read_input": 0.054,
|
|
170
|
+
"audio_input": 1.0,
|
|
171
|
+
"batch_audio_input": 0.5,
|
|
172
|
+
"flex_audio_input": 0.5,
|
|
173
|
+
"priority_audio_input": 1.8
|
|
158
174
|
},
|
|
159
175
|
"gemini/gemini-2.5-flash-lite": {
|
|
160
176
|
"input": 0.1,
|
|
@@ -168,7 +184,11 @@
|
|
|
168
184
|
"flex_cache_read_input": 0.01,
|
|
169
185
|
"priority_input": 0.18,
|
|
170
186
|
"priority_output": 0.72,
|
|
171
|
-
"priority_cache_read_input": 0.018
|
|
187
|
+
"priority_cache_read_input": 0.018,
|
|
188
|
+
"audio_input": 0.3,
|
|
189
|
+
"batch_audio_input": 0.15,
|
|
190
|
+
"flex_audio_input": 0.15,
|
|
191
|
+
"priority_audio_input": 0.54
|
|
172
192
|
},
|
|
173
193
|
"gemini/gemini-2.5-pro": {
|
|
174
194
|
"input": 1.25,
|
|
@@ -309,6 +329,71 @@
|
|
|
309
329
|
"priority_output": 1.0,
|
|
310
330
|
"priority_cache_read_input": 0.125
|
|
311
331
|
},
|
|
332
|
+
"openai/gpt-4o-realtime-preview": {
|
|
333
|
+
"input": 5.0,
|
|
334
|
+
"cache_read_input": 2.5,
|
|
335
|
+
"audio_input": 40.0,
|
|
336
|
+
"output": 20.0,
|
|
337
|
+
"audio_output": 80.0
|
|
338
|
+
},
|
|
339
|
+
"openai/gpt-4o-mini-realtime-preview": {
|
|
340
|
+
"input": 0.6,
|
|
341
|
+
"cache_read_input": 0.3,
|
|
342
|
+
"audio_input": 10.0,
|
|
343
|
+
"output": 2.4,
|
|
344
|
+
"audio_output": 20.0
|
|
345
|
+
},
|
|
346
|
+
"openai/gpt-realtime": {
|
|
347
|
+
"input": 4.0,
|
|
348
|
+
"cache_read_input": 0.4,
|
|
349
|
+
"audio_input": 32.0,
|
|
350
|
+
"output": 16.0,
|
|
351
|
+
"audio_output": 64.0
|
|
352
|
+
},
|
|
353
|
+
"openai/gpt-realtime-1.5": {
|
|
354
|
+
"input": 4.0,
|
|
355
|
+
"cache_read_input": 0.4,
|
|
356
|
+
"audio_input": 32.0,
|
|
357
|
+
"output": 16.0,
|
|
358
|
+
"audio_output": 64.0
|
|
359
|
+
},
|
|
360
|
+
"openai/gpt-realtime-mini": {
|
|
361
|
+
"input": 0.6,
|
|
362
|
+
"cache_read_input": 0.06,
|
|
363
|
+
"audio_input": 10.0,
|
|
364
|
+
"output": 2.4,
|
|
365
|
+
"audio_output": 20.0
|
|
366
|
+
},
|
|
367
|
+
"openai/gpt-audio-1.5": {
|
|
368
|
+
"input": 2.5,
|
|
369
|
+
"audio_input": 32.0,
|
|
370
|
+
"output": 10.0,
|
|
371
|
+
"audio_output": 64.0
|
|
372
|
+
},
|
|
373
|
+
"openai/gpt-audio-mini": {
|
|
374
|
+
"input": 0.6,
|
|
375
|
+
"audio_input": 10.0,
|
|
376
|
+
"output": 2.4,
|
|
377
|
+
"audio_output": 20.0
|
|
378
|
+
},
|
|
379
|
+
"openai/gpt-audio": {
|
|
380
|
+
"input": 2.5,
|
|
381
|
+
"audio_input": 32.0,
|
|
382
|
+
"output": 10.0,
|
|
383
|
+
"audio_output": 64.0
|
|
384
|
+
},
|
|
385
|
+
"openai/gpt-4o-audio-preview": {
|
|
386
|
+
"input": 2.5,
|
|
387
|
+
"audio_input": 40.0,
|
|
388
|
+
"output": 10.0,
|
|
389
|
+
"audio_output": 80.0
|
|
390
|
+
},
|
|
391
|
+
"openai/gpt-4o-mini-audio-preview": {
|
|
392
|
+
"input": 0.15,
|
|
393
|
+
"audio_input": 10.0,
|
|
394
|
+
"output": 0.6,
|
|
395
|
+
"audio_output": 20.0
|
|
396
|
+
},
|
|
312
397
|
"openai/gpt-5": {
|
|
313
398
|
"input": 1.25,
|
|
314
399
|
"output": 10.0,
|
|
@@ -672,7 +757,7 @@
|
|
|
672
757
|
"anthropic/claude-haiku-3-5": {
|
|
673
758
|
"input": 0.8,
|
|
674
759
|
"cache_write_input": 1.0,
|
|
675
|
-
"
|
|
760
|
+
"cache_write_extended_input": 1.6,
|
|
676
761
|
"cache_read_input": 0.08,
|
|
677
762
|
"output": 4.0,
|
|
678
763
|
"batch_input": 0.4,
|
|
@@ -681,7 +766,7 @@
|
|
|
681
766
|
"anthropic/claude-haiku-3": {
|
|
682
767
|
"input": 0.25,
|
|
683
768
|
"cache_write_input": 0.3,
|
|
684
|
-
"
|
|
769
|
+
"cache_write_extended_input": 0.5,
|
|
685
770
|
"cache_read_input": 0.03,
|
|
686
771
|
"output": 1.25,
|
|
687
772
|
"batch_input": 0.125,
|
|
@@ -1,18 +1,17 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative "components"
|
|
3
|
+
require_relative "../billing/components"
|
|
4
4
|
|
|
5
5
|
module LlmCostTracker
|
|
6
6
|
module Pricing
|
|
7
7
|
module EffectivePrices
|
|
8
8
|
class << self
|
|
9
9
|
def call(usage:, prices:, pricing_mode:)
|
|
10
|
-
quantities = usage.price_quantities
|
|
11
10
|
context_tier = context_tier?(usage: usage, prices: prices)
|
|
12
11
|
|
|
13
|
-
|
|
14
|
-
price_key = component.
|
|
15
|
-
tokens =
|
|
12
|
+
Billing::Components::TOKEN_PRICED.to_h do |component|
|
|
13
|
+
price_key = component.key
|
|
14
|
+
tokens = usage.public_send(component.token_key)
|
|
16
15
|
price = if tokens.positive?
|
|
17
16
|
price_for(
|
|
18
17
|
prices: prices,
|
|
@@ -30,29 +29,67 @@ module LlmCostTracker
|
|
|
30
29
|
private
|
|
31
30
|
|
|
32
31
|
def price_for(prices:, key:, pricing_mode:, context_tier:)
|
|
33
|
-
|
|
34
|
-
return contextual_price(prices: prices, key: key, context_tier: context_tier) unless mode
|
|
32
|
+
return contextual_price(prices: prices, key: key, context_tier: context_tier) unless pricing_mode
|
|
35
33
|
|
|
36
|
-
|
|
37
|
-
|
|
34
|
+
orderings = mode_orderings_for(pricing_mode)
|
|
35
|
+
orderings.each do |mode|
|
|
36
|
+
direct = contextual_price(prices: prices, key: :"#{mode}_#{key}", context_tier: context_tier)
|
|
37
|
+
return direct if direct
|
|
38
|
+
end
|
|
39
|
+
return nil if %i[input output].include?(key)
|
|
40
|
+
|
|
41
|
+
derived_mode_price(prices: prices, key: key, modes: orderings, context_tier: context_tier)
|
|
38
42
|
end
|
|
39
43
|
|
|
44
|
+
def mode_orderings_for(pricing_mode)
|
|
45
|
+
mode_string = pricing_mode.to_s
|
|
46
|
+
return [mode_string] unless mode_string.include?("_")
|
|
47
|
+
|
|
48
|
+
tokens = tokenize_mode(mode_string)
|
|
49
|
+
return [mode_string] if tokens.size <= 1
|
|
50
|
+
|
|
51
|
+
[mode_string, *tokens.permutation.map { |permutation| permutation.join("_") }].uniq
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def tokenize_mode(mode_string)
|
|
55
|
+
remaining = mode_string.dup
|
|
56
|
+
tokens = []
|
|
57
|
+
loop do
|
|
58
|
+
break if remaining.empty?
|
|
59
|
+
|
|
60
|
+
compound = COMPOUND_MODE_TOKENS.find { |token| remaining == token || remaining.start_with?("#{token}_") }
|
|
61
|
+
if compound
|
|
62
|
+
tokens << compound
|
|
63
|
+
remaining = remaining.delete_prefix(compound).delete_prefix("_")
|
|
64
|
+
else
|
|
65
|
+
first, _, rest = remaining.partition("_")
|
|
66
|
+
tokens << first
|
|
67
|
+
remaining = rest
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
tokens
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
COMPOUND_MODE_TOKENS = %w[data_residency].freeze
|
|
74
|
+
private_constant :COMPOUND_MODE_TOKENS
|
|
75
|
+
|
|
40
76
|
def contextual_price(prices:, key:, context_tier:)
|
|
41
77
|
return prices[key] unless context_tier
|
|
42
78
|
|
|
43
79
|
prices[:"above_context_#{key}"]
|
|
44
80
|
end
|
|
45
81
|
|
|
46
|
-
def derived_mode_price(prices:, key:,
|
|
82
|
+
def derived_mode_price(prices:, key:, modes:, context_tier:)
|
|
47
83
|
standard_price = contextual_price(prices: prices, key: key, context_tier: context_tier)
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
base_price = contextual_price(prices: prices, key: base_key, context_tier: context_tier)
|
|
52
|
-
mode_base_price = contextual_price(prices: prices, key: :"#{mode}_#{base_key}", context_tier: context_tier)
|
|
53
|
-
return nil unless base_price && mode_base_price
|
|
84
|
+
base_price = contextual_price(prices: prices, key: :input, context_tier: context_tier)
|
|
85
|
+
return nil unless standard_price && base_price
|
|
86
|
+
return nil if base_price.zero?
|
|
54
87
|
|
|
55
|
-
|
|
88
|
+
modes.each do |mode|
|
|
89
|
+
mode_base_price = contextual_price(prices: prices, key: :"#{mode}_input", context_tier: context_tier)
|
|
90
|
+
return standard_price * (mode_base_price / base_price) if mode_base_price
|
|
91
|
+
end
|
|
92
|
+
nil
|
|
56
93
|
end
|
|
57
94
|
|
|
58
95
|
def context_tier?(usage:, prices:)
|
|
@@ -62,8 +99,9 @@ module LlmCostTracker
|
|
|
62
99
|
input_tokens = usage.input_tokens +
|
|
63
100
|
usage.cache_read_input_tokens +
|
|
64
101
|
usage.cache_write_input_tokens +
|
|
65
|
-
usage.
|
|
66
|
-
|
|
102
|
+
usage.cache_write_extended_input_tokens +
|
|
103
|
+
usage.audio_input_tokens
|
|
104
|
+
input_tokens > threshold
|
|
67
105
|
end
|
|
68
106
|
end
|
|
69
107
|
end
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative "../token_usage"
|
|
3
4
|
require_relative "effective_prices"
|
|
4
5
|
|
|
5
6
|
module LlmCostTracker
|
|
@@ -33,7 +34,7 @@ module LlmCostTracker
|
|
|
33
34
|
|
|
34
35
|
module Explainer
|
|
35
36
|
class << self
|
|
36
|
-
def call(provider:, model:,
|
|
37
|
+
def call(provider:, model:, tokens:, pricing_mode: nil)
|
|
37
38
|
match = Lookup.call(provider: provider, model: model)
|
|
38
39
|
|
|
39
40
|
explanation(
|
|
@@ -41,7 +42,7 @@ module LlmCostTracker
|
|
|
41
42
|
model: model,
|
|
42
43
|
pricing_mode: pricing_mode,
|
|
43
44
|
match: match,
|
|
44
|
-
usage:
|
|
45
|
+
usage: TokenUsage.build_from_tokens(tokens)
|
|
45
46
|
)
|
|
46
47
|
end
|
|
47
48
|
|
|
@@ -50,9 +51,7 @@ module LlmCostTracker
|
|
|
50
51
|
def explanation(provider:, model:, pricing_mode:, match:, usage:)
|
|
51
52
|
prices = match&.prices
|
|
52
53
|
pricing_mode = Pricing.normalize_mode(pricing_mode)
|
|
53
|
-
effective =
|
|
54
|
-
EffectivePrices.call(usage: usage, prices: prices, pricing_mode: pricing_mode)
|
|
55
|
-
end
|
|
54
|
+
effective = EffectivePrices.call(usage: usage, prices: prices, pricing_mode: pricing_mode) if prices
|
|
56
55
|
|
|
57
56
|
Explanation.new(
|
|
58
57
|
provider: provider.to_s,
|