llm_cost_tracker 0.7.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/README.md +11 -9
- data/app/assets/llm_cost_tracker/application.css +3 -0
- data/app/controllers/llm_cost_tracker/application_controller.rb +22 -4
- data/app/controllers/llm_cost_tracker/calls_controller.rb +6 -11
- data/app/controllers/llm_cost_tracker/dashboard_controller.rb +2 -1
- data/app/controllers/llm_cost_tracker/data_quality_controller.rb +5 -1
- data/app/controllers/llm_cost_tracker/models_controller.rb +0 -1
- data/app/controllers/llm_cost_tracker/tags_controller.rb +1 -8
- data/app/helpers/llm_cost_tracker/application_helper.rb +2 -1
- data/app/helpers/llm_cost_tracker/dashboard_filter_helper.rb +1 -2
- data/app/helpers/llm_cost_tracker/dashboard_filter_options_helper.rb +1 -1
- data/app/helpers/llm_cost_tracker/dashboard_query_helper.rb +10 -27
- data/app/helpers/llm_cost_tracker/token_usage_helper.rb +58 -0
- data/app/models/llm_cost_tracker/ingestion/event.rb +13 -0
- data/app/models/llm_cost_tracker/ingestion/lease.rb +11 -0
- data/app/models/llm_cost_tracker/ledger/call.rb +45 -0
- data/app/models/llm_cost_tracker/ledger/call_metrics.rb +66 -0
- data/app/models/llm_cost_tracker/ledger/period/grouping.rb +71 -0
- data/app/models/llm_cost_tracker/ledger/period/total.rb +13 -0
- data/app/models/llm_cost_tracker/ledger/tags/accessors.rb +19 -0
- data/app/services/llm_cost_tracker/dashboard/data_quality.rb +111 -94
- data/app/services/llm_cost_tracker/dashboard/date_range.rb +2 -2
- data/app/services/llm_cost_tracker/dashboard/filter.rb +7 -18
- data/app/services/llm_cost_tracker/dashboard/overview_stats.rb +58 -67
- data/app/services/llm_cost_tracker/dashboard/pagination.rb +59 -0
- data/app/services/llm_cost_tracker/dashboard/params.rb +26 -0
- data/app/services/llm_cost_tracker/dashboard/provider_breakdown.rb +18 -20
- data/app/services/llm_cost_tracker/dashboard/spend_anomaly.rb +4 -13
- data/app/services/llm_cost_tracker/dashboard/tag_breakdown.rb +28 -61
- data/app/services/llm_cost_tracker/dashboard/tag_key_explorer.rb +8 -21
- data/app/services/llm_cost_tracker/dashboard/time_series.rb +1 -1
- data/app/services/llm_cost_tracker/dashboard/top_models.rb +12 -47
- data/app/views/llm_cost_tracker/calls/index.html.erb +12 -18
- data/app/views/llm_cost_tracker/calls/show.html.erb +30 -32
- data/app/views/llm_cost_tracker/dashboard/index.html.erb +17 -19
- data/app/views/llm_cost_tracker/data_quality/index.html.erb +108 -135
- data/app/views/llm_cost_tracker/models/index.html.erb +8 -9
- data/app/views/llm_cost_tracker/shared/setup_required.html.erb +13 -2
- data/app/views/llm_cost_tracker/tags/show.html.erb +20 -20
- data/lib/llm_cost_tracker/budget.rb +8 -20
- data/lib/llm_cost_tracker/capture/stream.rb +9 -0
- data/lib/llm_cost_tracker/capture/stream_collector.rb +182 -0
- data/lib/llm_cost_tracker/{integrations → capture}/stream_tracker.rb +40 -72
- data/lib/llm_cost_tracker/configuration/instrumentation.rb +3 -7
- data/lib/llm_cost_tracker/configuration.rb +28 -35
- data/lib/llm_cost_tracker/doctor/capture_verifier.rb +61 -0
- data/lib/llm_cost_tracker/doctor/check.rb +7 -0
- data/lib/llm_cost_tracker/doctor/ingestion_check.rb +22 -59
- data/lib/llm_cost_tracker/doctor/price_check.rb +60 -0
- data/lib/llm_cost_tracker/doctor.rb +63 -71
- data/lib/llm_cost_tracker/errors.rb +4 -15
- data/lib/llm_cost_tracker/event.rb +6 -6
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_token_usage_generator.rb +42 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/install_generator.rb +2 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/prices_generator.rb +7 -7
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_period_totals_to_llm_cost_tracker.rb.erb +3 -3
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_token_usage_to_llm_api_calls.rb.erb +22 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_api_calls.rb.erb +9 -14
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +0 -4
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_cost_precision.rb.erb +12 -1
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_tags_to_jsonb.rb.erb +2 -2
- data/lib/llm_cost_tracker/{storage/active_record_inbox_batch.rb → ingestion/batch.rb} +21 -20
- data/lib/llm_cost_tracker/ingestion/inbox.rb +105 -0
- data/lib/llm_cost_tracker/{storage/active_record_ingestor_lease.rb → ingestion/lease_claim.rb} +5 -7
- data/lib/llm_cost_tracker/{storage/active_record_ingestor.rb → ingestion/worker.rb} +38 -48
- data/lib/llm_cost_tracker/ingestion.rb +129 -0
- data/lib/llm_cost_tracker/integrations/anthropic.rb +52 -34
- data/lib/llm_cost_tracker/integrations/base.rb +73 -34
- data/lib/llm_cost_tracker/integrations/openai.rb +45 -39
- data/lib/llm_cost_tracker/integrations/ruby_llm.rb +40 -30
- data/lib/llm_cost_tracker/integrations.rb +43 -0
- data/lib/llm_cost_tracker/ledger/period/totals.rb +66 -0
- data/lib/llm_cost_tracker/{storage/active_record_periods.rb → ledger/period.rb} +2 -2
- data/lib/llm_cost_tracker/ledger/rollups/batch.rb +43 -0
- data/lib/llm_cost_tracker/ledger/rollups/upsert_sql.rb +46 -0
- data/lib/llm_cost_tracker/ledger/rollups.rb +87 -0
- data/lib/llm_cost_tracker/ledger/schema/adapter.rb +51 -0
- data/lib/llm_cost_tracker/ledger/schema/calls.rb +101 -0
- data/lib/llm_cost_tracker/ledger/schema/period_totals.rb +32 -0
- data/lib/llm_cost_tracker/ledger/store.rb +60 -0
- data/lib/llm_cost_tracker/ledger/tags/query.rb +29 -0
- data/lib/llm_cost_tracker/ledger/tags/sql.rb +33 -0
- data/lib/llm_cost_tracker/ledger.rb +13 -0
- data/lib/llm_cost_tracker/logging.rb +3 -6
- data/lib/llm_cost_tracker/middleware/faraday.rb +35 -36
- data/lib/llm_cost_tracker/parsers/anthropic.rb +38 -27
- data/lib/llm_cost_tracker/parsers/base.rb +10 -19
- data/lib/llm_cost_tracker/parsers/gemini.rb +15 -16
- data/lib/llm_cost_tracker/parsers/openai_usage.rb +24 -19
- data/lib/llm_cost_tracker/parsers/sse.rb +4 -7
- data/lib/llm_cost_tracker/parsers.rb +20 -0
- data/lib/llm_cost_tracker/prices.json +52 -11
- data/lib/llm_cost_tracker/pricing/components.rb +37 -0
- data/lib/llm_cost_tracker/pricing/effective_prices.rb +40 -50
- data/lib/llm_cost_tracker/pricing/explainer.rb +12 -23
- data/lib/llm_cost_tracker/pricing/lookup.rb +24 -25
- data/lib/llm_cost_tracker/pricing/registry.rb +156 -0
- data/lib/llm_cost_tracker/pricing/sync/fetcher.rb +107 -0
- data/lib/llm_cost_tracker/pricing/sync/registry_diff.rb +53 -0
- data/lib/llm_cost_tracker/pricing/sync/registry_loader.rb +63 -0
- data/lib/llm_cost_tracker/pricing/sync/registry_writer.rb +31 -0
- data/lib/llm_cost_tracker/pricing/sync.rb +143 -0
- data/lib/llm_cost_tracker/pricing/unknown.rb +46 -0
- data/lib/llm_cost_tracker/pricing.rb +33 -32
- data/lib/llm_cost_tracker/railtie.rb +7 -8
- data/lib/llm_cost_tracker/report/data.rb +72 -0
- data/lib/llm_cost_tracker/report/formatter.rb +69 -0
- data/lib/llm_cost_tracker/report.rb +8 -8
- data/lib/llm_cost_tracker/retention.rb +27 -10
- data/lib/llm_cost_tracker/tags/context.rb +35 -0
- data/lib/llm_cost_tracker/tags/key.rb +18 -0
- data/lib/llm_cost_tracker/tags/sanitizer.rb +68 -0
- data/lib/llm_cost_tracker/token_usage.rb +67 -0
- data/lib/llm_cost_tracker/tracker.rb +38 -70
- data/lib/llm_cost_tracker/usage_capture.rb +37 -0
- data/lib/llm_cost_tracker/version.rb +1 -1
- data/lib/llm_cost_tracker.rb +56 -78
- data/lib/tasks/llm_cost_tracker.rake +18 -13
- metadata +54 -58
- data/app/services/llm_cost_tracker/dashboard/data_quality_aggregate.rb +0 -81
- data/app/services/llm_cost_tracker/pagination.rb +0 -57
- data/lib/llm_cost_tracker/active_record_adapter.rb +0 -53
- data/lib/llm_cost_tracker/capture_verifier.rb +0 -64
- data/lib/llm_cost_tracker/cost.rb +0 -12
- data/lib/llm_cost_tracker/doctor/capture_check.rb +0 -39
- data/lib/llm_cost_tracker/event_metadata.rb +0 -52
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_usage_breakdown_generator.rb +0 -29
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_usage_breakdown_to_llm_api_calls.rb.erb +0 -29
- data/lib/llm_cost_tracker/inbox_event.rb +0 -9
- data/lib/llm_cost_tracker/ingestor_lease.rb +0 -9
- data/lib/llm_cost_tracker/integrations/object_reader.rb +0 -56
- data/lib/llm_cost_tracker/integrations/registry.rb +0 -71
- data/lib/llm_cost_tracker/llm_api_call.rb +0 -60
- data/lib/llm_cost_tracker/llm_api_call_metrics.rb +0 -63
- data/lib/llm_cost_tracker/parameter_hash.rb +0 -33
- data/lib/llm_cost_tracker/parsed_usage.rb +0 -72
- data/lib/llm_cost_tracker/parsers/registry.rb +0 -58
- data/lib/llm_cost_tracker/period_grouping.rb +0 -67
- data/lib/llm_cost_tracker/period_total.rb +0 -9
- data/lib/llm_cost_tracker/price_freshness.rb +0 -38
- data/lib/llm_cost_tracker/price_registry.rb +0 -144
- data/lib/llm_cost_tracker/price_sync/fetcher.rb +0 -104
- data/lib/llm_cost_tracker/price_sync/registry_diff.rb +0 -51
- data/lib/llm_cost_tracker/price_sync/registry_loader.rb +0 -61
- data/lib/llm_cost_tracker/price_sync/registry_writer.rb +0 -29
- data/lib/llm_cost_tracker/price_sync.rb +0 -144
- data/lib/llm_cost_tracker/report_data.rb +0 -94
- data/lib/llm_cost_tracker/report_formatter.rb +0 -67
- data/lib/llm_cost_tracker/request_url.rb +0 -20
- data/lib/llm_cost_tracker/storage/active_record_backend.rb +0 -167
- data/lib/llm_cost_tracker/storage/active_record_connection_cleanup.rb +0 -13
- data/lib/llm_cost_tracker/storage/active_record_inbox.rb +0 -160
- data/lib/llm_cost_tracker/storage/active_record_period_totals.rb +0 -84
- data/lib/llm_cost_tracker/storage/active_record_rollup_batch.rb +0 -41
- data/lib/llm_cost_tracker/storage/active_record_rollup_upsert_sql.rb +0 -42
- data/lib/llm_cost_tracker/storage/active_record_rollups.rb +0 -146
- data/lib/llm_cost_tracker/storage/active_record_store.rb +0 -145
- data/lib/llm_cost_tracker/storage/writer.rb +0 -35
- data/lib/llm_cost_tracker/stream_capture.rb +0 -7
- data/lib/llm_cost_tracker/stream_collector.rb +0 -199
- data/lib/llm_cost_tracker/tag_accessors.rb +0 -15
- data/lib/llm_cost_tracker/tag_context.rb +0 -52
- data/lib/llm_cost_tracker/tag_key.rb +0 -16
- data/lib/llm_cost_tracker/tag_query.rb +0 -43
- data/lib/llm_cost_tracker/tag_sanitizer.rb +0 -81
- data/lib/llm_cost_tracker/tag_sql.rb +0 -34
- data/lib/llm_cost_tracker/tags_column.rb +0 -105
- data/lib/llm_cost_tracker/unknown_pricing.rb +0 -54
- data/lib/llm_cost_tracker/usage_breakdown.rb +0 -30
- data/lib/llm_cost_tracker/value_helpers.rb +0 -40
|
@@ -24,17 +24,13 @@ module LlmCostTracker
|
|
|
24
24
|
|
|
25
25
|
request = safe_json_parse(request_body)
|
|
26
26
|
cache_read = usage["cache_read_input_tokens"].to_i
|
|
27
|
-
cache_write = usage["cache_creation_input_tokens"].to_i
|
|
28
27
|
|
|
29
|
-
|
|
28
|
+
UsageCapture.build(
|
|
30
29
|
provider: "anthropic",
|
|
31
30
|
provider_response_id: response["id"],
|
|
31
|
+
pricing_mode: pricing_mode(request, response, usage),
|
|
32
32
|
model: response["model"] || request["model"],
|
|
33
|
-
|
|
34
|
-
output_tokens: usage["output_tokens"].to_i,
|
|
35
|
-
total_tokens: usage["input_tokens"].to_i + usage["output_tokens"].to_i + cache_read + cache_write,
|
|
36
|
-
cache_read_input_tokens: usage["cache_read_input_tokens"],
|
|
37
|
-
cache_write_input_tokens: usage["cache_creation_input_tokens"],
|
|
33
|
+
token_usage: token_usage(usage, cache_read),
|
|
38
34
|
usage_source: :response
|
|
39
35
|
)
|
|
40
36
|
end
|
|
@@ -43,17 +39,18 @@ module LlmCostTracker
|
|
|
43
39
|
return nil unless response_status == 200
|
|
44
40
|
|
|
45
41
|
request = safe_json_parse(request_body)
|
|
46
|
-
model =
|
|
42
|
+
model = find_event_value(events) { |data| data.dig("message", "model") } || request["model"]
|
|
47
43
|
usage = stream_usage(events)
|
|
48
|
-
response_id =
|
|
44
|
+
response_id = find_event_value(events) { |data| data.dig("message", "id") || data["id"] }
|
|
49
45
|
|
|
50
46
|
if usage
|
|
51
|
-
build_stream_result(model, usage, response_id)
|
|
47
|
+
build_stream_result(model, usage, response_id, pricing_mode(request, nil, usage))
|
|
52
48
|
else
|
|
53
49
|
build_unknown_stream_usage(
|
|
54
50
|
provider: "anthropic",
|
|
55
51
|
model: model,
|
|
56
|
-
provider_response_id: response_id
|
|
52
|
+
provider_response_id: response_id,
|
|
53
|
+
pricing_mode: pricing_mode(request, nil, usage)
|
|
57
54
|
)
|
|
58
55
|
end
|
|
59
56
|
end
|
|
@@ -75,33 +72,47 @@ module LlmCostTracker
|
|
|
75
72
|
end
|
|
76
73
|
end
|
|
77
74
|
|
|
78
|
-
def
|
|
79
|
-
|
|
80
|
-
end
|
|
75
|
+
def build_stream_result(model, usage, response_id, pricing_mode)
|
|
76
|
+
cache_read = usage["cache_read_input_tokens"].to_i
|
|
81
77
|
|
|
82
|
-
|
|
83
|
-
|
|
78
|
+
UsageCapture.build(
|
|
79
|
+
provider: "anthropic",
|
|
80
|
+
provider_response_id: response_id,
|
|
81
|
+
pricing_mode: pricing_mode,
|
|
82
|
+
model: model,
|
|
83
|
+
token_usage: token_usage(usage, cache_read),
|
|
84
|
+
stream: true,
|
|
85
|
+
usage_source: :stream_final
|
|
86
|
+
)
|
|
84
87
|
end
|
|
85
88
|
|
|
86
|
-
def
|
|
89
|
+
def token_usage(usage, cache_read)
|
|
87
90
|
input = usage["input_tokens"].to_i
|
|
88
91
|
output = usage["output_tokens"].to_i
|
|
89
|
-
|
|
90
|
-
|
|
92
|
+
cache_creation = usage["cache_creation"]
|
|
93
|
+
if cache_creation.is_a?(Hash)
|
|
94
|
+
cache_write = cache_creation["ephemeral_5m_input_tokens"].to_i
|
|
95
|
+
cache_write_1h = cache_creation["ephemeral_1h_input_tokens"].to_i
|
|
96
|
+
else
|
|
97
|
+
cache_write = usage["cache_creation_input_tokens"].to_i
|
|
98
|
+
cache_write_1h = 0
|
|
99
|
+
end
|
|
91
100
|
|
|
92
|
-
|
|
93
|
-
provider: "anthropic",
|
|
94
|
-
provider_response_id: response_id,
|
|
95
|
-
model: model,
|
|
101
|
+
TokenUsage.build(
|
|
96
102
|
input_tokens: input,
|
|
97
103
|
output_tokens: output,
|
|
98
|
-
total_tokens: input + output + cache_read + cache_write,
|
|
104
|
+
total_tokens: input + output + cache_read + cache_write + cache_write_1h,
|
|
99
105
|
cache_read_input_tokens: usage["cache_read_input_tokens"],
|
|
100
|
-
cache_write_input_tokens:
|
|
101
|
-
|
|
102
|
-
usage_source: :stream_final
|
|
106
|
+
cache_write_input_tokens: cache_write,
|
|
107
|
+
cache_write_1h_input_tokens: cache_write_1h
|
|
103
108
|
)
|
|
104
109
|
end
|
|
110
|
+
|
|
111
|
+
def pricing_mode(request, response, usage)
|
|
112
|
+
usage&.fetch("service_tier", nil) ||
|
|
113
|
+
response&.fetch("service_tier", nil) ||
|
|
114
|
+
request["service_tier"]
|
|
115
|
+
end
|
|
105
116
|
end
|
|
106
117
|
end
|
|
107
118
|
end
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "active_support/core_ext/object/blank"
|
|
3
4
|
require "json"
|
|
4
5
|
require "uri"
|
|
5
6
|
|
|
@@ -19,10 +20,9 @@ module LlmCostTracker
|
|
|
19
20
|
end
|
|
20
21
|
|
|
21
22
|
def streaming_request?(_request_url, request_body)
|
|
22
|
-
return false if request_body.
|
|
23
|
+
return false if request_body.blank?
|
|
23
24
|
|
|
24
25
|
body = request_body.to_s
|
|
25
|
-
return false if body.empty?
|
|
26
26
|
|
|
27
27
|
request = safe_json_parse(body)
|
|
28
28
|
request.is_a?(Hash) && request["stream"] == true
|
|
@@ -35,7 +35,7 @@ module LlmCostTracker
|
|
|
35
35
|
private
|
|
36
36
|
|
|
37
37
|
def safe_json_parse(body)
|
|
38
|
-
return {} if body.
|
|
38
|
+
return {} if body.blank?
|
|
39
39
|
|
|
40
40
|
JSON.parse(body)
|
|
41
41
|
rescue JSON::ParserError
|
|
@@ -49,7 +49,7 @@ module LlmCostTracker
|
|
|
49
49
|
|
|
50
50
|
def match_uri?(url, hosts: nil, exact_paths: nil, path_includes: nil, path_suffixes: nil, path_pattern: nil)
|
|
51
51
|
uri_matches?(url) do |uri|
|
|
52
|
-
host_match = hosts.nil? ||
|
|
52
|
+
host_match = hosts.nil? || hosts.include?(uri.host.to_s.downcase)
|
|
53
53
|
path_match = path_matches?(
|
|
54
54
|
uri,
|
|
55
55
|
exact_paths: exact_paths,
|
|
@@ -69,10 +69,6 @@ module LlmCostTracker
|
|
|
69
69
|
nil
|
|
70
70
|
end
|
|
71
71
|
|
|
72
|
-
def host_matches?(uri, hosts)
|
|
73
|
-
hosts.include?(uri.host.to_s.downcase)
|
|
74
|
-
end
|
|
75
|
-
|
|
76
72
|
def path_matches?(uri, exact_paths: nil, path_includes: nil, path_suffixes: nil, path_pattern: nil)
|
|
77
73
|
path = uri.path.to_s
|
|
78
74
|
matches = true
|
|
@@ -98,28 +94,23 @@ module LlmCostTracker
|
|
|
98
94
|
def find_event_value(events, reverse: false)
|
|
99
95
|
each_event_data(events, reverse:) do |data|
|
|
100
96
|
value = yield(data)
|
|
101
|
-
return value if
|
|
97
|
+
return value if value.present?
|
|
102
98
|
end
|
|
103
99
|
|
|
104
100
|
nil
|
|
105
101
|
end
|
|
106
102
|
|
|
107
|
-
def build_unknown_stream_usage(provider:, model:, provider_response_id:)
|
|
108
|
-
|
|
103
|
+
def build_unknown_stream_usage(provider:, model:, provider_response_id:, pricing_mode: nil)
|
|
104
|
+
UsageCapture.build(
|
|
109
105
|
provider: provider,
|
|
110
106
|
provider_response_id: provider_response_id,
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
output_tokens: 0,
|
|
114
|
-
total_tokens: 0,
|
|
107
|
+
pricing_mode: pricing_mode,
|
|
108
|
+
model: model || UsageCapture::UNKNOWN_MODEL,
|
|
109
|
+
token_usage: TokenUsage.build(input_tokens: 0, output_tokens: 0, total_tokens: 0),
|
|
115
110
|
stream: true,
|
|
116
111
|
usage_source: :unknown
|
|
117
112
|
)
|
|
118
113
|
end
|
|
119
|
-
|
|
120
|
-
def event_value_present?(value)
|
|
121
|
-
!value.nil? && (!value.respond_to?(:empty?) || !value.empty?)
|
|
122
|
-
end
|
|
123
114
|
end
|
|
124
115
|
end
|
|
125
116
|
end
|
|
@@ -18,7 +18,7 @@ module LlmCostTracker
|
|
|
18
18
|
end
|
|
19
19
|
|
|
20
20
|
def streaming_request?(request_url, request_body)
|
|
21
|
-
return true if
|
|
21
|
+
return true if match_uri?(request_url, path_pattern: STREAM_PATH_PATTERN)
|
|
22
22
|
|
|
23
23
|
super
|
|
24
24
|
end
|
|
@@ -30,7 +30,7 @@ module LlmCostTracker
|
|
|
30
30
|
usage = response["usageMetadata"]
|
|
31
31
|
return nil unless usage
|
|
32
32
|
|
|
33
|
-
|
|
33
|
+
build_usage_capture(
|
|
34
34
|
request_url,
|
|
35
35
|
usage,
|
|
36
36
|
usage_source: :response,
|
|
@@ -46,7 +46,7 @@ module LlmCostTracker
|
|
|
46
46
|
response_id = stream_response_id(events)
|
|
47
47
|
|
|
48
48
|
if usage
|
|
49
|
-
|
|
49
|
+
build_usage_capture(
|
|
50
50
|
request_url,
|
|
51
51
|
usage,
|
|
52
52
|
stream: true,
|
|
@@ -64,17 +64,20 @@ module LlmCostTracker
|
|
|
64
64
|
|
|
65
65
|
private
|
|
66
66
|
|
|
67
|
-
def
|
|
67
|
+
def build_usage_capture(request_url, usage, usage_source:, stream: false, provider_response_id: nil)
|
|
68
68
|
cache_read = usage["cachedContentTokenCount"].to_i
|
|
69
|
+
tool_use_prompt = usage["toolUsePromptTokenCount"].to_i
|
|
69
70
|
|
|
70
|
-
|
|
71
|
+
UsageCapture.build(
|
|
71
72
|
provider: "gemini",
|
|
72
73
|
model: extract_model_from_url(request_url),
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
74
|
+
token_usage: TokenUsage.build(
|
|
75
|
+
input_tokens: [usage["promptTokenCount"].to_i - cache_read, 0].max + tool_use_prompt,
|
|
76
|
+
output_tokens: output_tokens(usage),
|
|
77
|
+
total_tokens: total_tokens(usage, cache_read, tool_use_prompt),
|
|
78
|
+
cache_read_input_tokens: usage["cachedContentTokenCount"],
|
|
79
|
+
hidden_output_tokens: usage["thoughtsTokenCount"]
|
|
80
|
+
),
|
|
78
81
|
stream: stream,
|
|
79
82
|
usage_source: usage_source,
|
|
80
83
|
provider_response_id: provider_response_id
|
|
@@ -92,21 +95,17 @@ module LlmCostTracker
|
|
|
92
95
|
usage["candidatesTokenCount"].to_i + usage["thoughtsTokenCount"].to_i
|
|
93
96
|
end
|
|
94
97
|
|
|
95
|
-
def total_tokens(usage, cache_read)
|
|
98
|
+
def total_tokens(usage, cache_read, tool_use_prompt)
|
|
96
99
|
total = usage["totalTokenCount"]
|
|
97
100
|
return total.to_i unless total.nil?
|
|
98
101
|
|
|
99
|
-
[usage["promptTokenCount"].to_i - cache_read, 0].max + cache_read + output_tokens(usage)
|
|
102
|
+
[usage["promptTokenCount"].to_i - cache_read, 0].max + cache_read + tool_use_prompt + output_tokens(usage)
|
|
100
103
|
end
|
|
101
104
|
|
|
102
105
|
def stream_response_id(events)
|
|
103
106
|
find_event_value(events) { |data| data["responseId"] }
|
|
104
107
|
end
|
|
105
108
|
|
|
106
|
-
def streaming_url?(request_url)
|
|
107
|
-
match_uri?(request_url, path_pattern: STREAM_PATH_PATTERN)
|
|
108
|
-
end
|
|
109
|
-
|
|
110
109
|
def extract_model_from_url(url)
|
|
111
110
|
uri = parsed_uri(url)
|
|
112
111
|
return nil unless uri
|
|
@@ -15,15 +15,12 @@ module LlmCostTracker
|
|
|
15
15
|
request = safe_json_parse(request_body)
|
|
16
16
|
cache_read = cache_read_input_tokens(usage)
|
|
17
17
|
|
|
18
|
-
|
|
18
|
+
UsageCapture.build(
|
|
19
19
|
provider: provider_for(request_url),
|
|
20
20
|
provider_response_id: response["id"],
|
|
21
|
+
pricing_mode: response["service_tier"] || request["service_tier"],
|
|
21
22
|
model: response["model"] || request["model"],
|
|
22
|
-
|
|
23
|
-
output_tokens: (usage["completion_tokens"] || usage["output_tokens"]).to_i,
|
|
24
|
-
total_tokens: total_tokens(usage, cache_read),
|
|
25
|
-
cache_read_input_tokens: cache_read,
|
|
26
|
-
hidden_output_tokens: hidden_output_tokens(usage),
|
|
23
|
+
token_usage: token_usage(usage, cache_read),
|
|
27
24
|
usage_source: :response
|
|
28
25
|
)
|
|
29
26
|
end
|
|
@@ -32,21 +29,20 @@ module LlmCostTracker
|
|
|
32
29
|
return nil unless response_status == 200
|
|
33
30
|
|
|
34
31
|
request = safe_json_parse(request_body)
|
|
35
|
-
model =
|
|
32
|
+
model =
|
|
33
|
+
find_event_value(events) { |data| data["model"] || data.dig("response", "model") } || request["model"]
|
|
36
34
|
usage = detect_stream_usage(events)
|
|
37
|
-
response_id =
|
|
35
|
+
response_id = find_event_value(events) { |data| data["id"] || data.dig("response", "id") }
|
|
36
|
+
pricing_mode = stream_pricing_mode(events) || request["service_tier"]
|
|
38
37
|
|
|
39
38
|
if usage
|
|
40
39
|
cache_read = cache_read_input_tokens(usage)
|
|
41
|
-
|
|
40
|
+
UsageCapture.build(
|
|
42
41
|
provider: provider_for(request_url),
|
|
43
42
|
provider_response_id: response_id,
|
|
43
|
+
pricing_mode: pricing_mode,
|
|
44
44
|
model: model,
|
|
45
|
-
|
|
46
|
-
output_tokens: (usage["completion_tokens"] || usage["output_tokens"]).to_i,
|
|
47
|
-
total_tokens: total_tokens(usage, cache_read),
|
|
48
|
-
cache_read_input_tokens: cache_read,
|
|
49
|
-
hidden_output_tokens: hidden_output_tokens(usage),
|
|
45
|
+
token_usage: token_usage(usage, cache_read),
|
|
50
46
|
stream: true,
|
|
51
47
|
usage_source: :stream_final
|
|
52
48
|
)
|
|
@@ -54,7 +50,8 @@ module LlmCostTracker
|
|
|
54
50
|
build_unknown_stream_usage(
|
|
55
51
|
provider: provider_for(request_url),
|
|
56
52
|
model: model,
|
|
57
|
-
provider_response_id: response_id
|
|
53
|
+
provider_response_id: response_id,
|
|
54
|
+
pricing_mode: pricing_mode
|
|
58
55
|
)
|
|
59
56
|
end
|
|
60
57
|
end
|
|
@@ -66,12 +63,20 @@ module LlmCostTracker
|
|
|
66
63
|
end
|
|
67
64
|
end
|
|
68
65
|
|
|
69
|
-
def
|
|
70
|
-
find_event_value(events)
|
|
66
|
+
def stream_pricing_mode(events)
|
|
67
|
+
find_event_value(events, reverse: true) do |data|
|
|
68
|
+
data["service_tier"] || data.dig("response", "service_tier")
|
|
69
|
+
end
|
|
71
70
|
end
|
|
72
71
|
|
|
73
|
-
def
|
|
74
|
-
|
|
72
|
+
def token_usage(usage, cache_read)
|
|
73
|
+
TokenUsage.build(
|
|
74
|
+
input_tokens: regular_input_tokens(usage, cache_read),
|
|
75
|
+
output_tokens: (usage["completion_tokens"] || usage["output_tokens"]).to_i,
|
|
76
|
+
total_tokens: total_tokens(usage, cache_read),
|
|
77
|
+
cache_read_input_tokens: cache_read,
|
|
78
|
+
hidden_output_tokens: hidden_output_tokens(usage)
|
|
79
|
+
)
|
|
75
80
|
end
|
|
76
81
|
|
|
77
82
|
def regular_input_tokens(usage, cache_read)
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "active_support/core_ext/object/blank"
|
|
3
4
|
require "json"
|
|
4
5
|
|
|
5
6
|
module LlmCostTracker
|
|
@@ -9,9 +10,9 @@ module LlmCostTracker
|
|
|
9
10
|
|
|
10
11
|
class << self
|
|
11
12
|
def parse(body)
|
|
12
|
-
return [] if body.
|
|
13
|
+
return [] if body.blank?
|
|
13
14
|
|
|
14
|
-
return parse_json_array(body) if
|
|
15
|
+
return parse_json_array(body) if body.lstrip.start_with?("[")
|
|
15
16
|
|
|
16
17
|
parse_event_stream(body)
|
|
17
18
|
end
|
|
@@ -65,16 +66,12 @@ module LlmCostTracker
|
|
|
65
66
|
end
|
|
66
67
|
|
|
67
68
|
def decode_data(payload)
|
|
68
|
-
return payload if payload.
|
|
69
|
+
return payload if payload.blank?
|
|
69
70
|
|
|
70
71
|
JSON.parse(payload)
|
|
71
72
|
rescue JSON::ParserError
|
|
72
73
|
payload
|
|
73
74
|
end
|
|
74
|
-
|
|
75
|
-
def probably_json_array?(body)
|
|
76
|
-
body.lstrip.start_with?("[")
|
|
77
|
-
end
|
|
78
75
|
end
|
|
79
76
|
end
|
|
80
77
|
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LlmCostTracker
|
|
4
|
+
module Parsers
|
|
5
|
+
BUILT_INS = [Openai.new, OpenaiCompatible.new, Anthropic.new, Gemini.new].freeze
|
|
6
|
+
|
|
7
|
+
module_function
|
|
8
|
+
|
|
9
|
+
def find_for(url)
|
|
10
|
+
BUILT_INS.find { |parser| parser.match?(url) }
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def find_for_provider(provider)
|
|
14
|
+
provider_name = provider.to_s.downcase
|
|
15
|
+
BUILT_INS.find do |parser|
|
|
16
|
+
Array(parser.provider_names).map { |name| name.to_s.downcase }.include?(provider_name)
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -17,6 +17,7 @@
|
|
|
17
17
|
"output": 5.0,
|
|
18
18
|
"cache_read_input": 0.1,
|
|
19
19
|
"cache_write_input": 1.25,
|
|
20
|
+
"cache_write_1h_input": 2.0,
|
|
20
21
|
"batch_input": 0.5,
|
|
21
22
|
"batch_output": 2.5
|
|
22
23
|
},
|
|
@@ -25,6 +26,7 @@
|
|
|
25
26
|
"output": 75.0,
|
|
26
27
|
"cache_read_input": 1.5,
|
|
27
28
|
"cache_write_input": 18.75,
|
|
29
|
+
"cache_write_1h_input": 30.0,
|
|
28
30
|
"batch_input": 7.5,
|
|
29
31
|
"batch_output": 37.5
|
|
30
32
|
},
|
|
@@ -33,6 +35,7 @@
|
|
|
33
35
|
"output": 75.0,
|
|
34
36
|
"cache_read_input": 1.5,
|
|
35
37
|
"cache_write_input": 18.75,
|
|
38
|
+
"cache_write_1h_input": 30.0,
|
|
36
39
|
"batch_input": 7.5,
|
|
37
40
|
"batch_output": 37.5
|
|
38
41
|
},
|
|
@@ -41,6 +44,7 @@
|
|
|
41
44
|
"output": 25.0,
|
|
42
45
|
"cache_read_input": 0.5,
|
|
43
46
|
"cache_write_input": 6.25,
|
|
47
|
+
"cache_write_1h_input": 10.0,
|
|
44
48
|
"batch_input": 2.5,
|
|
45
49
|
"batch_output": 12.5
|
|
46
50
|
},
|
|
@@ -49,6 +53,7 @@
|
|
|
49
53
|
"output": 25.0,
|
|
50
54
|
"cache_read_input": 0.5,
|
|
51
55
|
"cache_write_input": 6.25,
|
|
56
|
+
"cache_write_1h_input": 10.0,
|
|
52
57
|
"batch_input": 2.5,
|
|
53
58
|
"batch_output": 12.5
|
|
54
59
|
},
|
|
@@ -57,6 +62,7 @@
|
|
|
57
62
|
"output": 25.0,
|
|
58
63
|
"cache_read_input": 0.5,
|
|
59
64
|
"cache_write_input": 6.25,
|
|
65
|
+
"cache_write_1h_input": 10.0,
|
|
60
66
|
"batch_input": 2.5,
|
|
61
67
|
"batch_output": 12.5
|
|
62
68
|
},
|
|
@@ -65,6 +71,7 @@
|
|
|
65
71
|
"output": 15.0,
|
|
66
72
|
"cache_read_input": 0.3,
|
|
67
73
|
"cache_write_input": 3.75,
|
|
74
|
+
"cache_write_1h_input": 6.0,
|
|
68
75
|
"batch_input": 1.5,
|
|
69
76
|
"batch_output": 7.5
|
|
70
77
|
},
|
|
@@ -73,6 +80,7 @@
|
|
|
73
80
|
"output": 15.0,
|
|
74
81
|
"cache_read_input": 0.3,
|
|
75
82
|
"cache_write_input": 3.75,
|
|
83
|
+
"cache_write_1h_input": 6.0,
|
|
76
84
|
"batch_input": 1.5,
|
|
77
85
|
"batch_output": 7.5
|
|
78
86
|
},
|
|
@@ -81,6 +89,7 @@
|
|
|
81
89
|
"output": 15.0,
|
|
82
90
|
"cache_read_input": 0.3,
|
|
83
91
|
"cache_write_input": 3.75,
|
|
92
|
+
"cache_write_1h_input": 6.0,
|
|
84
93
|
"batch_input": 1.5,
|
|
85
94
|
"batch_output": 7.5
|
|
86
95
|
},
|
|
@@ -89,7 +98,8 @@
|
|
|
89
98
|
"cache_read_input": 0.025,
|
|
90
99
|
"output": 0.4,
|
|
91
100
|
"batch_input": 0.05,
|
|
92
|
-
"batch_output": 0.2
|
|
101
|
+
"batch_output": 0.2,
|
|
102
|
+
"batch_cache_read_input": 0.025
|
|
93
103
|
},
|
|
94
104
|
"gemini/gemini-2.0-flash-lite": {
|
|
95
105
|
"input": 0.075,
|
|
@@ -101,25 +111,32 @@
|
|
|
101
111
|
"input": 0.3,
|
|
102
112
|
"output": 2.5,
|
|
103
113
|
"cache_read_input": 0.03,
|
|
104
|
-
"cache_write_input": 0.083333333333,
|
|
105
114
|
"batch_input": 0.15,
|
|
106
|
-
"batch_output": 1.25
|
|
115
|
+
"batch_output": 1.25,
|
|
116
|
+
"batch_cache_read_input": 0.03
|
|
107
117
|
},
|
|
108
118
|
"gemini/gemini-2.5-flash-lite": {
|
|
109
119
|
"input": 0.1,
|
|
110
120
|
"output": 0.4,
|
|
111
121
|
"cache_read_input": 0.01,
|
|
112
|
-
"cache_write_input": 0.083333333333,
|
|
113
122
|
"batch_input": 0.05,
|
|
114
|
-
"batch_output": 0.2
|
|
123
|
+
"batch_output": 0.2,
|
|
124
|
+
"batch_cache_read_input": 0.01
|
|
115
125
|
},
|
|
116
126
|
"gemini/gemini-2.5-pro": {
|
|
117
127
|
"input": 1.25,
|
|
118
128
|
"output": 10.0,
|
|
119
129
|
"cache_read_input": 0.125,
|
|
120
|
-
"cache_write_input": 0.375,
|
|
121
130
|
"batch_input": 0.625,
|
|
122
|
-
"batch_output": 5.0
|
|
131
|
+
"batch_output": 5.0,
|
|
132
|
+
"batch_cache_read_input": 0.125,
|
|
133
|
+
"_context_price_threshold_tokens": 200000,
|
|
134
|
+
"above_context_input": 2.5,
|
|
135
|
+
"above_context_output": 15.0,
|
|
136
|
+
"above_context_cache_read_input": 0.25,
|
|
137
|
+
"above_context_batch_input": 1.25,
|
|
138
|
+
"above_context_batch_output": 7.5,
|
|
139
|
+
"above_context_batch_cache_read_input": 0.25
|
|
123
140
|
},
|
|
124
141
|
"openai/gpt-3.5-turbo": {
|
|
125
142
|
"input": 0.5,
|
|
@@ -276,7 +293,14 @@
|
|
|
276
293
|
"cache_read_input": 0.25,
|
|
277
294
|
"batch_input": 1.25,
|
|
278
295
|
"batch_output": 7.5,
|
|
279
|
-
"batch_cache_read_input": 0.13
|
|
296
|
+
"batch_cache_read_input": 0.13,
|
|
297
|
+
"_context_price_threshold_tokens": 272000,
|
|
298
|
+
"above_context_input": 5.0,
|
|
299
|
+
"above_context_output": 22.5,
|
|
300
|
+
"above_context_cache_read_input": 0.5,
|
|
301
|
+
"above_context_batch_input": 2.5,
|
|
302
|
+
"above_context_batch_output": 11.25,
|
|
303
|
+
"above_context_batch_cache_read_input": 0.25
|
|
280
304
|
},
|
|
281
305
|
"openai/gpt-5.4-mini": {
|
|
282
306
|
"input": 0.75,
|
|
@@ -298,7 +322,12 @@
|
|
|
298
322
|
"input": 30.0,
|
|
299
323
|
"output": 180.0,
|
|
300
324
|
"batch_input": 15.0,
|
|
301
|
-
"batch_output": 90.0
|
|
325
|
+
"batch_output": 90.0,
|
|
326
|
+
"_context_price_threshold_tokens": 272000,
|
|
327
|
+
"above_context_input": 60.0,
|
|
328
|
+
"above_context_output": 270.0,
|
|
329
|
+
"above_context_batch_input": 30.0,
|
|
330
|
+
"above_context_batch_output": 135.0
|
|
302
331
|
},
|
|
303
332
|
"openai/gpt-5.5": {
|
|
304
333
|
"input": 5.0,
|
|
@@ -306,13 +335,23 @@
|
|
|
306
335
|
"cache_read_input": 0.5,
|
|
307
336
|
"batch_input": 2.5,
|
|
308
337
|
"batch_output": 15.0,
|
|
309
|
-
"batch_cache_read_input": 0.25
|
|
338
|
+
"batch_cache_read_input": 0.25,
|
|
339
|
+
"_context_price_threshold_tokens": 272000,
|
|
340
|
+
"above_context_input": 10.0,
|
|
341
|
+
"above_context_output": 45.0,
|
|
342
|
+
"above_context_cache_read_input": 1.0,
|
|
343
|
+
"above_context_batch_input": 5.0,
|
|
344
|
+
"above_context_batch_output": 22.5,
|
|
345
|
+
"above_context_batch_cache_read_input": 0.5
|
|
310
346
|
},
|
|
311
347
|
"openai/gpt-5.5-pro": {
|
|
312
348
|
"input": 30.0,
|
|
313
349
|
"output": 180.0,
|
|
314
350
|
"batch_input": 15.0,
|
|
315
|
-
"batch_output": 90.0
|
|
351
|
+
"batch_output": 90.0,
|
|
352
|
+
"_context_price_threshold_tokens": 272000,
|
|
353
|
+
"above_context_input": 60.0,
|
|
354
|
+
"above_context_output": 270.0
|
|
316
355
|
},
|
|
317
356
|
"openai/o1": {
|
|
318
357
|
"input": 15.0,
|
|
@@ -352,6 +391,7 @@
|
|
|
352
391
|
"anthropic/claude-haiku-3-5": {
|
|
353
392
|
"input": 0.8,
|
|
354
393
|
"cache_write_input": 1.0,
|
|
394
|
+
"cache_write_1h_input": 1.6,
|
|
355
395
|
"cache_read_input": 0.08,
|
|
356
396
|
"output": 4.0,
|
|
357
397
|
"batch_input": 0.4,
|
|
@@ -360,6 +400,7 @@
|
|
|
360
400
|
"anthropic/claude-haiku-3": {
|
|
361
401
|
"input": 0.25,
|
|
362
402
|
"cache_write_input": 0.3,
|
|
403
|
+
"cache_write_1h_input": 0.5,
|
|
363
404
|
"cache_read_input": 0.03,
|
|
364
405
|
"output": 1.25,
|
|
365
406
|
"batch_input": 0.125,
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LlmCostTracker
|
|
4
|
+
module Pricing
|
|
5
|
+
Component = Data.define(:price_key, :token_key, :cost_key)
|
|
6
|
+
|
|
7
|
+
COMPONENTS = [
|
|
8
|
+
Component.new(
|
|
9
|
+
price_key: :input,
|
|
10
|
+
token_key: :input_tokens,
|
|
11
|
+
cost_key: :input_cost
|
|
12
|
+
),
|
|
13
|
+
Component.new(
|
|
14
|
+
price_key: :cache_read_input,
|
|
15
|
+
token_key: :cache_read_input_tokens,
|
|
16
|
+
cost_key: :cache_read_input_cost
|
|
17
|
+
),
|
|
18
|
+
Component.new(
|
|
19
|
+
price_key: :cache_write_input,
|
|
20
|
+
token_key: :cache_write_input_tokens,
|
|
21
|
+
cost_key: :cache_write_input_cost
|
|
22
|
+
),
|
|
23
|
+
Component.new(
|
|
24
|
+
price_key: :cache_write_1h_input,
|
|
25
|
+
token_key: :cache_write_1h_input_tokens,
|
|
26
|
+
cost_key: :cache_write_1h_input_cost
|
|
27
|
+
),
|
|
28
|
+
Component.new(
|
|
29
|
+
price_key: :output,
|
|
30
|
+
token_key: :output_tokens,
|
|
31
|
+
cost_key: :output_cost
|
|
32
|
+
)
|
|
33
|
+
].freeze
|
|
34
|
+
|
|
35
|
+
COST_KEYS = (COMPONENTS.map(&:cost_key) + %i[total_cost]).freeze
|
|
36
|
+
end
|
|
37
|
+
end
|