llm_cost_tracker 0.7.2 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ruby-version +1 -0
- data/CHANGELOG.md +72 -1
- data/README.md +58 -221
- data/app/assets/llm_cost_tracker/application.css +218 -41
- data/app/controllers/llm_cost_tracker/application_controller.rb +30 -17
- data/app/controllers/llm_cost_tracker/assets_controller.rb +11 -1
- data/app/controllers/llm_cost_tracker/calls_controller.rb +19 -14
- data/app/controllers/llm_cost_tracker/data_quality_controller.rb +10 -2
- data/app/helpers/llm_cost_tracker/application_helper.rb +11 -24
- data/app/helpers/llm_cost_tracker/dashboard_filter_helper.rb +3 -21
- data/app/helpers/llm_cost_tracker/dashboard_filter_options_helper.rb +4 -4
- data/app/helpers/llm_cost_tracker/dashboard_query_helper.rb +1 -1
- data/app/helpers/llm_cost_tracker/token_usage_helper.rb +20 -7
- data/app/models/llm_cost_tracker/call.rb +169 -0
- data/app/models/llm_cost_tracker/call_line_item.rb +22 -0
- data/app/models/llm_cost_tracker/call_rollup.rb +9 -0
- data/app/models/llm_cost_tracker/call_tag.rb +16 -0
- data/app/models/llm_cost_tracker/ingestion/inbox_entry.rb +13 -0
- data/app/models/llm_cost_tracker/ingestion/lease.rb +1 -1
- data/app/models/llm_cost_tracker/provider_invoice.rb +9 -0
- data/app/services/llm_cost_tracker/dashboard/data_quality.rb +125 -34
- data/app/services/llm_cost_tracker/dashboard/date_range.rb +1 -1
- data/app/services/llm_cost_tracker/dashboard/filter.rb +2 -2
- data/app/services/llm_cost_tracker/dashboard/overview_stats.rb +74 -21
- data/app/services/llm_cost_tracker/dashboard/pagination.rb +6 -4
- data/app/services/llm_cost_tracker/dashboard/params.rb +8 -2
- data/app/services/llm_cost_tracker/dashboard/provider_breakdown.rb +1 -1
- data/app/services/llm_cost_tracker/dashboard/spend_anomaly.rb +4 -3
- data/app/services/llm_cost_tracker/dashboard/tag_breakdown.rb +42 -9
- data/app/services/llm_cost_tracker/dashboard/tag_key_explorer.rb +14 -37
- data/app/services/llm_cost_tracker/dashboard/time_series.rb +1 -1
- data/app/services/llm_cost_tracker/dashboard/top_models.rb +1 -1
- data/app/views/llm_cost_tracker/calls/index.html.erb +33 -75
- data/app/views/llm_cost_tracker/calls/show.html.erb +62 -7
- data/app/views/llm_cost_tracker/dashboard/index.html.erb +9 -50
- data/app/views/llm_cost_tracker/data_quality/index.html.erb +103 -126
- data/app/views/llm_cost_tracker/errors/database.html.erb +1 -1
- data/app/views/llm_cost_tracker/models/index.html.erb +18 -50
- data/app/views/llm_cost_tracker/shared/_filters.html.erb +63 -0
- data/app/views/llm_cost_tracker/shared/_sort.html.erb +13 -0
- data/app/views/llm_cost_tracker/shared/setup_required.html.erb +1 -1
- data/app/views/llm_cost_tracker/tags/index.html.erb +3 -34
- data/app/views/llm_cost_tracker/tags/show.html.erb +5 -37
- data/lib/llm_cost_tracker/billing/components.rb +53 -0
- data/lib/llm_cost_tracker/billing/components.yml +117 -0
- data/lib/llm_cost_tracker/billing/cost_status.rb +45 -0
- data/lib/llm_cost_tracker/billing/line_item.rb +189 -0
- data/lib/llm_cost_tracker/budget.rb +23 -35
- data/lib/llm_cost_tracker/capture/stream_collector.rb +47 -33
- data/lib/llm_cost_tracker/configuration.rb +36 -19
- data/lib/llm_cost_tracker/doctor/cost_drift_check.rb +54 -0
- data/lib/llm_cost_tracker/doctor/ingestion_check.rb +24 -32
- data/lib/llm_cost_tracker/doctor/legacy_audit_check.rb +36 -0
- data/lib/llm_cost_tracker/doctor/legacy_billing_status_check.rb +22 -0
- data/lib/llm_cost_tracker/doctor/price_check.rb +2 -2
- data/lib/llm_cost_tracker/doctor/pricing_snapshot_drift_check.rb +85 -0
- data/lib/llm_cost_tracker/doctor/probe.rb +17 -0
- data/lib/llm_cost_tracker/doctor/schema_check.rb +31 -0
- data/lib/llm_cost_tracker/doctor.rb +43 -45
- data/lib/llm_cost_tracker/errors.rb +5 -19
- data/lib/llm_cost_tracker/event.rb +10 -2
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/install_generator.rb +4 -2
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/prices_generator.rb +2 -6
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_calls.rb.erb +157 -0
- data/lib/llm_cost_tracker/ingestion/batch.rb +11 -12
- data/lib/llm_cost_tracker/ingestion/inbox.rb +39 -23
- data/lib/llm_cost_tracker/ingestion/worker.rb +14 -5
- data/lib/llm_cost_tracker/ingestion.rb +28 -22
- data/lib/llm_cost_tracker/integrations/anthropic.rb +45 -38
- data/lib/llm_cost_tracker/integrations/base.rb +36 -29
- data/lib/llm_cost_tracker/integrations/openai.rb +85 -40
- data/lib/llm_cost_tracker/integrations/ruby_llm.rb +5 -5
- data/lib/llm_cost_tracker/integrations.rb +2 -2
- data/lib/llm_cost_tracker/ledger/period/totals.rb +12 -9
- data/lib/llm_cost_tracker/ledger/period.rb +5 -5
- data/lib/llm_cost_tracker/ledger/rollups/upsert_sql.rb +4 -10
- data/lib/llm_cost_tracker/ledger/rollups.rb +76 -25
- data/lib/llm_cost_tracker/ledger/schema/adapter.rb +18 -0
- data/lib/llm_cost_tracker/ledger/schema/call_line_items.rb +50 -0
- data/lib/llm_cost_tracker/ledger/schema/call_rollups.rb +37 -0
- data/lib/llm_cost_tracker/ledger/schema/call_tags.rb +26 -0
- data/lib/llm_cost_tracker/ledger/schema/calls.rb +34 -23
- data/lib/llm_cost_tracker/ledger/schema/provider_invoices.rb +57 -0
- data/lib/llm_cost_tracker/ledger/store.rb +110 -18
- data/lib/llm_cost_tracker/ledger/tags/query.rb +5 -11
- data/lib/llm_cost_tracker/ledger/tags/sql.rb +27 -14
- data/lib/llm_cost_tracker/ledger.rb +4 -2
- data/lib/llm_cost_tracker/logging.rb +2 -5
- data/lib/llm_cost_tracker/middleware/faraday.rb +7 -6
- data/lib/llm_cost_tracker/parsers/anthropic.rb +52 -7
- data/lib/llm_cost_tracker/parsers/base.rb +8 -3
- data/lib/llm_cost_tracker/parsers/gemini.rb +101 -15
- data/lib/llm_cost_tracker/parsers/openai_compatible.rb +10 -2
- data/lib/llm_cost_tracker/parsers/openai_service_charges.rb +87 -0
- data/lib/llm_cost_tracker/parsers/openai_usage.rb +48 -21
- data/lib/llm_cost_tracker/parsers/sse.rb +1 -1
- data/lib/llm_cost_tracker/parsers.rb +1 -1
- data/lib/llm_cost_tracker/prices.json +105 -20
- data/lib/llm_cost_tracker/pricing/effective_prices.rb +57 -19
- data/lib/llm_cost_tracker/pricing/explainer.rb +4 -5
- data/lib/llm_cost_tracker/pricing/lookup.rb +38 -34
- data/lib/llm_cost_tracker/pricing/registry.rb +65 -45
- data/lib/llm_cost_tracker/pricing/service_charges.rb +204 -0
- data/lib/llm_cost_tracker/pricing/sync/fetcher.rb +26 -17
- data/lib/llm_cost_tracker/pricing/sync/registry_diff.rb +6 -15
- data/lib/llm_cost_tracker/pricing/sync.rb +57 -10
- data/lib/llm_cost_tracker/pricing/sync_change_printer.rb +32 -0
- data/lib/llm_cost_tracker/pricing.rb +190 -26
- data/lib/llm_cost_tracker/railtie.rb +0 -8
- data/lib/llm_cost_tracker/report/data.rb +16 -8
- data/lib/llm_cost_tracker/report.rb +0 -4
- data/lib/llm_cost_tracker/retention.rb +8 -8
- data/lib/llm_cost_tracker/tags/context.rb +2 -4
- data/lib/llm_cost_tracker/tags/key.rb +4 -0
- data/lib/llm_cost_tracker/tags/sanitizer.rb +12 -17
- data/lib/llm_cost_tracker/timing.rb +15 -0
- data/lib/llm_cost_tracker/token_usage.rb +56 -42
- data/lib/llm_cost_tracker/tracker.rb +67 -24
- data/lib/llm_cost_tracker/usage_capture.rb +29 -8
- data/lib/llm_cost_tracker/version.rb +1 -1
- data/lib/llm_cost_tracker.rb +36 -35
- data/lib/tasks/llm_cost_tracker.rake +22 -17
- metadata +36 -41
- data/app/models/llm_cost_tracker/ingestion/event.rb +0 -13
- data/app/models/llm_cost_tracker/ledger/call.rb +0 -45
- data/app/models/llm_cost_tracker/ledger/call_metrics.rb +0 -66
- data/app/models/llm_cost_tracker/ledger/period/grouping.rb +0 -71
- data/app/models/llm_cost_tracker/ledger/period/total.rb +0 -13
- data/app/models/llm_cost_tracker/ledger/tags/accessors.rb +0 -19
- data/lib/llm_cost_tracker/configuration/instrumentation.rb +0 -33
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_ingestion_generator.rb +0 -29
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_latency_ms_generator.rb +0 -29
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_period_totals_generator.rb +0 -29
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_provider_response_id_generator.rb +0 -29
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_streaming_generator.rb +0 -29
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_token_usage_generator.rb +0 -42
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_ingestion_to_llm_cost_tracker.rb.erb +0 -33
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_latency_ms_to_llm_api_calls.rb.erb +0 -9
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_period_totals_to_llm_cost_tracker.rb.erb +0 -104
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_provider_response_id_to_llm_api_calls.rb.erb +0 -15
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_streaming_to_llm_api_calls.rb.erb +0 -21
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_token_usage_to_llm_api_calls.rb.erb +0 -22
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_api_calls.rb.erb +0 -83
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_cost_precision.rb.erb +0 -26
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_tags_to_jsonb.rb.erb +0 -44
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_cost_precision_generator.rb +0 -29
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_tags_to_jsonb_generator.rb +0 -29
- data/lib/llm_cost_tracker/ledger/rollups/batch.rb +0 -43
- data/lib/llm_cost_tracker/ledger/schema/period_totals.rb +0 -32
- data/lib/llm_cost_tracker/pricing/components.rb +0 -37
- data/lib/llm_cost_tracker/pricing/sync/registry_loader.rb +0 -63
|
@@ -1,30 +1,43 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative "../schema/adapter"
|
|
4
3
|
require_relative "../../tags/key"
|
|
5
4
|
|
|
6
5
|
module LlmCostTracker
|
|
7
6
|
module Ledger
|
|
8
7
|
module Tags
|
|
9
8
|
module Sql
|
|
9
|
+
UNTAGGED_LABEL = "(untagged)"
|
|
10
|
+
|
|
10
11
|
class << self
|
|
11
|
-
def
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
12
|
+
def join_relation(scope, key)
|
|
13
|
+
validated_key = LlmCostTracker::Tags::Key.validate!(key)
|
|
14
|
+
connection = scope.connection
|
|
15
|
+
join = "LEFT OUTER JOIN #{call_tag_table} ON " \
|
|
16
|
+
"#{call_tag_table}.llm_cost_tracker_call_id = #{scope.quoted_table_name}.id AND " \
|
|
17
|
+
"#{call_tag_table}.#{connection.quote_column_name('key')} = #{connection.quote(validated_key)}"
|
|
18
|
+
scope.joins(join)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def value_arel
|
|
22
|
+
Arel.sql("#{call_tag_table}.#{quote_column('value')}")
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def label_sql(connection)
|
|
26
|
+
"COALESCE(NULLIF(#{raw_value_sql(connection)}, ''), #{connection.quote(UNTAGGED_LABEL)})"
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def raw_value_sql(connection)
|
|
30
|
+
"#{call_tag_table}.#{connection.quote_column_name('value')}"
|
|
22
31
|
end
|
|
23
32
|
|
|
24
33
|
private
|
|
25
34
|
|
|
26
|
-
def
|
|
27
|
-
|
|
35
|
+
def call_tag_table
|
|
36
|
+
LlmCostTracker::CallTag.quoted_table_name
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def quote_column(name)
|
|
40
|
+
LlmCostTracker::CallTag.connection.quote_column_name(name)
|
|
28
41
|
end
|
|
29
42
|
end
|
|
30
43
|
end
|
|
@@ -2,11 +2,13 @@
|
|
|
2
2
|
|
|
3
3
|
require_relative "ledger/schema/adapter"
|
|
4
4
|
require_relative "ledger/schema/calls"
|
|
5
|
-
require_relative "ledger/schema/
|
|
5
|
+
require_relative "ledger/schema/call_rollups"
|
|
6
|
+
require_relative "ledger/schema/call_line_items"
|
|
7
|
+
require_relative "ledger/schema/call_tags"
|
|
8
|
+
require_relative "ledger/schema/provider_invoices"
|
|
6
9
|
require_relative "ledger/tags/query"
|
|
7
10
|
require_relative "ledger/tags/sql"
|
|
8
11
|
require_relative "ledger/period"
|
|
9
|
-
require_relative "ledger/rollups/batch"
|
|
10
12
|
require_relative "ledger/rollups/upsert_sql"
|
|
11
13
|
require_relative "ledger/rollups"
|
|
12
14
|
require_relative "ledger/store"
|
|
@@ -20,12 +20,9 @@ module LlmCostTracker
|
|
|
20
20
|
def log(level, message)
|
|
21
21
|
message = prefixed(message)
|
|
22
22
|
logger = Rails.logger
|
|
23
|
+
return Kernel.warn(message) unless logger
|
|
23
24
|
|
|
24
|
-
|
|
25
|
-
logger.try(level, message)
|
|
26
|
-
else
|
|
27
|
-
Kernel.warn(message)
|
|
28
|
-
end
|
|
25
|
+
logger.public_send(level, message)
|
|
29
26
|
end
|
|
30
27
|
|
|
31
28
|
private
|
|
@@ -6,6 +6,7 @@ require "uri"
|
|
|
6
6
|
|
|
7
7
|
require_relative "../logging"
|
|
8
8
|
require_relative "../capture/stream"
|
|
9
|
+
require_relative "../timing"
|
|
9
10
|
|
|
10
11
|
module LlmCostTracker
|
|
11
12
|
module Middleware
|
|
@@ -19,14 +20,14 @@ module LlmCostTracker
|
|
|
19
20
|
return @app.call(request_env) unless LlmCostTracker.configuration.enabled
|
|
20
21
|
|
|
21
22
|
request_url = request_env.url.to_s
|
|
22
|
-
request_body = read_body(request_env.body)
|
|
23
|
+
request_body = read_body(request_env.body)
|
|
23
24
|
parser = Parsers.find_for(request_url)
|
|
24
25
|
streaming = parser&.streaming_request?(request_url, request_body)
|
|
25
26
|
stream_buffer = install_stream_tap(request_env) if streaming
|
|
26
27
|
|
|
27
28
|
Tracker.enforce_budget! if parser
|
|
28
29
|
context_tags, metadata = tag_snapshot(request_env) if parser
|
|
29
|
-
started_at =
|
|
30
|
+
started_at = LlmCostTracker::Timing.now_monotonic
|
|
30
31
|
|
|
31
32
|
@app.call(request_env).on_complete do |response_env|
|
|
32
33
|
process(
|
|
@@ -34,7 +35,7 @@ module LlmCostTracker
|
|
|
34
35
|
request_url: request_url,
|
|
35
36
|
request_body: request_body,
|
|
36
37
|
response_env: response_env,
|
|
37
|
-
latency_ms:
|
|
38
|
+
latency_ms: LlmCostTracker::Timing.elapsed_ms(started_at),
|
|
38
39
|
streaming: streaming,
|
|
39
40
|
stream_buffer: stream_buffer,
|
|
40
41
|
context_tags: context_tags,
|
|
@@ -135,7 +136,7 @@ module LlmCostTracker
|
|
|
135
136
|
end
|
|
136
137
|
|
|
137
138
|
def install_stream_tap(request_env)
|
|
138
|
-
request = request_env.
|
|
139
|
+
request = request_env.request
|
|
139
140
|
return nil unless request
|
|
140
141
|
|
|
141
142
|
original = request.on_data
|
|
@@ -204,8 +205,8 @@ module LlmCostTracker
|
|
|
204
205
|
uri = URI.parse(value.to_s)
|
|
205
206
|
uri.query = nil
|
|
206
207
|
uri.fragment = nil
|
|
207
|
-
uri.
|
|
208
|
-
uri.
|
|
208
|
+
uri.user = nil
|
|
209
|
+
uri.password = nil
|
|
209
210
|
uri.to_s
|
|
210
211
|
rescue URI::InvalidURIError
|
|
211
212
|
value.to_s.split("?", 2).first
|
|
@@ -31,7 +31,8 @@ module LlmCostTracker
|
|
|
31
31
|
pricing_mode: pricing_mode(request: request, response: response, usage: usage),
|
|
32
32
|
model: response["model"] || request["model"],
|
|
33
33
|
token_usage: token_usage(usage: usage, cache_read: cache_read),
|
|
34
|
-
usage_source: :response
|
|
34
|
+
usage_source: :response,
|
|
35
|
+
service_line_items: service_line_items(usage)
|
|
35
36
|
)
|
|
36
37
|
end
|
|
37
38
|
|
|
@@ -87,7 +88,39 @@ module LlmCostTracker
|
|
|
87
88
|
model: model,
|
|
88
89
|
token_usage: token_usage(usage: usage, cache_read: cache_read),
|
|
89
90
|
stream: true,
|
|
90
|
-
usage_source: :stream_final
|
|
91
|
+
usage_source: :stream_final,
|
|
92
|
+
service_line_items: service_line_items(usage)
|
|
93
|
+
)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def service_line_items(usage)
|
|
97
|
+
server_tool_use = usage["server_tool_use"]
|
|
98
|
+
return [] unless server_tool_use.is_a?(Hash)
|
|
99
|
+
|
|
100
|
+
[
|
|
101
|
+
service_line_item(
|
|
102
|
+
component_key: :web_search_request,
|
|
103
|
+
quantity: server_tool_use["web_search_requests"],
|
|
104
|
+
provider_field: "usage.server_tool_use.web_search_requests"
|
|
105
|
+
),
|
|
106
|
+
service_line_item(
|
|
107
|
+
component_key: :code_execution_request,
|
|
108
|
+
quantity: server_tool_use["code_execution_requests"],
|
|
109
|
+
provider_field: "usage.server_tool_use.code_execution_requests"
|
|
110
|
+
)
|
|
111
|
+
].compact
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def service_line_item(component_key:, quantity:, provider_field:)
|
|
115
|
+
quantity = quantity.to_i
|
|
116
|
+
return if quantity.zero?
|
|
117
|
+
|
|
118
|
+
Billing::LineItem.build(
|
|
119
|
+
component_key: component_key,
|
|
120
|
+
quantity: quantity,
|
|
121
|
+
cost_status: Billing::CostStatus::UNKNOWN,
|
|
122
|
+
pricing_basis: :provider_usage,
|
|
123
|
+
provider_field: provider_field
|
|
91
124
|
)
|
|
92
125
|
end
|
|
93
126
|
|
|
@@ -97,22 +130,34 @@ module LlmCostTracker
|
|
|
97
130
|
cache_creation = usage["cache_creation"]
|
|
98
131
|
if cache_creation.is_a?(Hash)
|
|
99
132
|
cache_write = cache_creation["ephemeral_5m_input_tokens"].to_i
|
|
100
|
-
|
|
133
|
+
cache_write_extended = cache_creation["ephemeral_1h_input_tokens"].to_i
|
|
101
134
|
else
|
|
135
|
+
warn_unexpected_cache_creation(cache_creation, usage)
|
|
102
136
|
cache_write = usage["cache_creation_input_tokens"].to_i
|
|
103
|
-
|
|
137
|
+
cache_write_extended = 0
|
|
104
138
|
end
|
|
139
|
+
hidden_output = (
|
|
140
|
+
usage["thinking_tokens"] || usage["thinking_output_tokens"] ||
|
|
141
|
+
usage.dig("output_tokens_details", "reasoning_tokens")
|
|
142
|
+
).to_i
|
|
105
143
|
|
|
106
144
|
TokenUsage.build(
|
|
107
145
|
input_tokens: input,
|
|
108
146
|
output_tokens: output,
|
|
109
|
-
total_tokens: input + output + cache_read + cache_write +
|
|
110
|
-
cache_read_input_tokens:
|
|
147
|
+
total_tokens: input + output + cache_read + cache_write + cache_write_extended,
|
|
148
|
+
cache_read_input_tokens: cache_read,
|
|
111
149
|
cache_write_input_tokens: cache_write,
|
|
112
|
-
|
|
150
|
+
cache_write_extended_input_tokens: cache_write_extended,
|
|
151
|
+
hidden_output_tokens: hidden_output
|
|
113
152
|
)
|
|
114
153
|
end
|
|
115
154
|
|
|
155
|
+
def warn_unexpected_cache_creation(cache_creation, usage)
|
|
156
|
+
return if cache_creation.nil? || usage.key?("cache_creation_input_tokens")
|
|
157
|
+
|
|
158
|
+
Logging.warn("Anthropic usage.cache_creation has unexpected shape: #{cache_creation.class}")
|
|
159
|
+
end
|
|
160
|
+
|
|
116
161
|
def pricing_mode(request:, response:, usage:)
|
|
117
162
|
modes = []
|
|
118
163
|
speed = usage&.fetch("speed", nil) || response&.fetch("speed", nil) || request["speed"]
|
|
@@ -59,7 +59,10 @@ module LlmCostTracker
|
|
|
59
59
|
)
|
|
60
60
|
extra_match = block_given? ? yield(uri) : true
|
|
61
61
|
|
|
62
|
-
host_match && path_match
|
|
62
|
+
next false unless host_match && path_match
|
|
63
|
+
next false unless extra_match
|
|
64
|
+
|
|
65
|
+
true
|
|
63
66
|
end
|
|
64
67
|
end
|
|
65
68
|
|
|
@@ -100,7 +103,8 @@ module LlmCostTracker
|
|
|
100
103
|
nil
|
|
101
104
|
end
|
|
102
105
|
|
|
103
|
-
def build_unknown_stream_usage(provider:, model:, provider_response_id:, pricing_mode: nil
|
|
106
|
+
def build_unknown_stream_usage(provider:, model:, provider_response_id:, pricing_mode: nil,
|
|
107
|
+
service_line_items: nil)
|
|
104
108
|
UsageCapture.build(
|
|
105
109
|
provider: provider,
|
|
106
110
|
provider_response_id: provider_response_id,
|
|
@@ -108,7 +112,8 @@ module LlmCostTracker
|
|
|
108
112
|
model: model || UsageCapture::UNKNOWN_MODEL,
|
|
109
113
|
token_usage: TokenUsage.build(input_tokens: 0, output_tokens: 0, total_tokens: 0),
|
|
110
114
|
stream: true,
|
|
111
|
-
usage_source: :unknown
|
|
115
|
+
usage_source: :unknown,
|
|
116
|
+
service_line_items: service_line_items
|
|
112
117
|
)
|
|
113
118
|
end
|
|
114
119
|
end
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative "../billing/line_item"
|
|
3
4
|
require_relative "base"
|
|
4
5
|
|
|
5
6
|
module LlmCostTracker
|
|
@@ -8,6 +9,7 @@ module LlmCostTracker
|
|
|
8
9
|
HOSTS = %w[generativelanguage.googleapis.com].freeze
|
|
9
10
|
TRACKED_PATH_PATTERN = %r{/models/[^/:]+:(?:generateContent|streamGenerateContent)\z}
|
|
10
11
|
STREAM_PATH_PATTERN = /:streamGenerateContent\z/
|
|
12
|
+
PER_QUERY_GROUNDING_MODEL_PATTERN = /\bgemini-(?:[3-9]|[1-9]\d)\b/i
|
|
11
13
|
|
|
12
14
|
def match?(url)
|
|
13
15
|
match_uri?(url, hosts: HOSTS, path_pattern: TRACKED_PATH_PATTERN)
|
|
@@ -31,12 +33,14 @@ module LlmCostTracker
|
|
|
31
33
|
return nil unless usage
|
|
32
34
|
|
|
33
35
|
request = safe_json_parse(request_body)
|
|
36
|
+
model = extract_model_from_url(request_url)
|
|
34
37
|
build_usage_capture(
|
|
35
38
|
request_url: request_url,
|
|
36
39
|
usage: usage,
|
|
37
40
|
usage_source: :response,
|
|
38
41
|
provider_response_id: response["responseId"],
|
|
39
|
-
pricing_mode: pricing_mode(request: request, response_headers: response_headers)
|
|
42
|
+
pricing_mode: pricing_mode(request: request, response_headers: response_headers),
|
|
43
|
+
service_line_items: grounding_line_items_for_response(response, model: model)
|
|
40
44
|
)
|
|
41
45
|
end
|
|
42
46
|
|
|
@@ -48,6 +52,7 @@ module LlmCostTracker
|
|
|
48
52
|
model = extract_model_from_url(request_url)
|
|
49
53
|
response_id = stream_response_id(events)
|
|
50
54
|
mode = pricing_mode(request: request, response_headers: response_headers)
|
|
55
|
+
service_line_items = grounding_line_items_for_stream(events, model: model)
|
|
51
56
|
|
|
52
57
|
if usage
|
|
53
58
|
build_usage_capture(
|
|
@@ -56,14 +61,16 @@ module LlmCostTracker
|
|
|
56
61
|
stream: true,
|
|
57
62
|
usage_source: :stream_final,
|
|
58
63
|
provider_response_id: response_id,
|
|
59
|
-
pricing_mode: mode
|
|
64
|
+
pricing_mode: mode,
|
|
65
|
+
service_line_items: service_line_items
|
|
60
66
|
)
|
|
61
67
|
else
|
|
62
68
|
build_unknown_stream_usage(
|
|
63
69
|
provider: "gemini",
|
|
64
70
|
model: model,
|
|
65
71
|
provider_response_id: response_id,
|
|
66
|
-
pricing_mode: mode
|
|
72
|
+
pricing_mode: mode,
|
|
73
|
+
service_line_items: service_line_items
|
|
67
74
|
)
|
|
68
75
|
end
|
|
69
76
|
end
|
|
@@ -71,24 +78,30 @@ module LlmCostTracker
|
|
|
71
78
|
private
|
|
72
79
|
|
|
73
80
|
def build_usage_capture(request_url:, usage:, usage_source:, stream: false, provider_response_id: nil,
|
|
74
|
-
pricing_mode: nil)
|
|
81
|
+
pricing_mode: nil, service_line_items: nil)
|
|
75
82
|
cache_read = usage["cachedContentTokenCount"].to_i
|
|
76
83
|
tool_use_prompt = usage["toolUsePromptTokenCount"].to_i
|
|
84
|
+
audio_input = audio_input_tokens(usage)
|
|
85
|
+
audio_output = audio_output_tokens(usage)
|
|
77
86
|
|
|
78
87
|
UsageCapture.build(
|
|
79
88
|
provider: "gemini",
|
|
80
89
|
model: extract_model_from_url(request_url),
|
|
81
90
|
pricing_mode: pricing_mode,
|
|
82
91
|
token_usage: TokenUsage.build(
|
|
83
|
-
input_tokens:
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
92
|
+
input_tokens: regular_input_tokens(usage: usage, cache_read: cache_read, audio_input: audio_input) +
|
|
93
|
+
tool_use_prompt,
|
|
94
|
+
output_tokens: regular_output_tokens(usage: usage, audio_output: audio_output),
|
|
95
|
+
total_tokens: usage["totalTokenCount"],
|
|
96
|
+
cache_read_input_tokens: cache_read,
|
|
97
|
+
audio_input_tokens: audio_input,
|
|
98
|
+
audio_output_tokens: audio_output,
|
|
87
99
|
hidden_output_tokens: usage["thoughtsTokenCount"]
|
|
88
100
|
),
|
|
89
101
|
stream: stream,
|
|
90
102
|
usage_source: usage_source,
|
|
91
|
-
provider_response_id: provider_response_id
|
|
103
|
+
provider_response_id: provider_response_id,
|
|
104
|
+
service_line_items: service_line_items
|
|
92
105
|
)
|
|
93
106
|
end
|
|
94
107
|
|
|
@@ -100,14 +113,41 @@ module LlmCostTracker
|
|
|
100
113
|
end
|
|
101
114
|
|
|
102
115
|
def output_tokens(usage)
|
|
103
|
-
usage["candidatesTokenCount"].to_i + usage["thoughtsTokenCount"].to_i
|
|
116
|
+
(usage["candidatesTokenCount"] || usage["responseTokenCount"]).to_i + usage["thoughtsTokenCount"].to_i
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def regular_input_tokens(usage:, cache_read:, audio_input:)
|
|
120
|
+
[usage["promptTokenCount"].to_i - cache_read - audio_input, 0].max
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def regular_output_tokens(usage:, audio_output:)
|
|
124
|
+
[output_tokens(usage) - audio_output, 0].max
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def audio_input_tokens(usage)
|
|
128
|
+
prompt_audio = modality_tokens(usage["promptTokensDetails"] || usage["prompt_tokens_details"], "AUDIO")
|
|
129
|
+
cache_audio = modality_tokens(usage["cacheTokensDetails"] || usage["cache_tokens_details"], "AUDIO")
|
|
130
|
+
[prompt_audio - cache_audio, 0].max
|
|
104
131
|
end
|
|
105
132
|
|
|
106
|
-
def
|
|
107
|
-
|
|
108
|
-
|
|
133
|
+
def audio_output_tokens(usage)
|
|
134
|
+
modality_tokens(
|
|
135
|
+
usage["candidatesTokensDetails"] ||
|
|
136
|
+
usage["candidates_tokens_details"] ||
|
|
137
|
+
usage["responseTokensDetails"] ||
|
|
138
|
+
usage["response_tokens_details"],
|
|
139
|
+
"AUDIO"
|
|
140
|
+
)
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def modality_tokens(details, modality)
|
|
144
|
+
Array(details).sum do |detail|
|
|
145
|
+
next 0 unless detail.is_a?(Hash)
|
|
146
|
+
|
|
147
|
+
next 0 unless detail["modality"] == modality
|
|
109
148
|
|
|
110
|
-
|
|
149
|
+
(detail["tokenCount"] || detail["token_count"]).to_i
|
|
150
|
+
end
|
|
111
151
|
end
|
|
112
152
|
|
|
113
153
|
def stream_response_id(events)
|
|
@@ -133,12 +173,58 @@ module LlmCostTracker
|
|
|
133
173
|
request.dig("config", "service_tier") ||
|
|
134
174
|
request.dig("config", "serviceTier")
|
|
135
175
|
)
|
|
136
|
-
request_mode ==
|
|
176
|
+
request_mode == :flex ? request_mode : nil
|
|
137
177
|
end
|
|
138
178
|
|
|
139
179
|
def response_header(headers, name)
|
|
140
180
|
headers.to_h.find { |key, _value| key.to_s.downcase == name }&.last
|
|
141
181
|
end
|
|
182
|
+
|
|
183
|
+
def grounding_line_items_for_response(response, model:)
|
|
184
|
+
grounding_line_items(grounding_request_count(response["candidates"]), model: model)
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
def grounding_line_items_for_stream(events, model:)
|
|
188
|
+
quantity = find_event_value(events, reverse: true) do |data|
|
|
189
|
+
count = grounding_request_count(data["candidates"])
|
|
190
|
+
count if count.positive?
|
|
191
|
+
end
|
|
192
|
+
grounding_line_items(quantity || 0, model: model)
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
def grounding_request_count(candidates)
|
|
196
|
+
Array(candidates).sum do |candidate|
|
|
197
|
+
next 0 unless candidate.is_a?(Hash)
|
|
198
|
+
|
|
199
|
+
metadata = candidate["groundingMetadata"] || candidate["grounding_metadata"] || {}
|
|
200
|
+
queries = metadata["webSearchQueries"] || metadata["web_search_queries"] || []
|
|
201
|
+
Array(queries).size
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
def grounding_line_items(query_count, model:)
|
|
206
|
+
return [] unless query_count.positive?
|
|
207
|
+
|
|
208
|
+
billed_quantity = grounding_billed_quantity(query_count, model: model)
|
|
209
|
+
[
|
|
210
|
+
Billing::LineItem.build(
|
|
211
|
+
component_key: :grounding_request,
|
|
212
|
+
quantity: billed_quantity,
|
|
213
|
+
cost_status: Billing::CostStatus::UNKNOWN,
|
|
214
|
+
pricing_basis: :provider_usage,
|
|
215
|
+
provider_field: "response.candidates.groundingMetadata.webSearchQueries",
|
|
216
|
+
details: { web_search_queries: query_count }
|
|
217
|
+
)
|
|
218
|
+
]
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
def grounding_billed_quantity(query_count, model:)
|
|
222
|
+
per_query_billing?(model) ? query_count : 1
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
def per_query_billing?(model)
|
|
226
|
+
model.to_s.match?(PER_QUERY_GROUNDING_MODEL_PATTERN)
|
|
227
|
+
end
|
|
142
228
|
end
|
|
143
229
|
end
|
|
144
230
|
end
|
|
@@ -15,10 +15,18 @@ module LlmCostTracker
|
|
|
15
15
|
end
|
|
16
16
|
|
|
17
17
|
def provider_names
|
|
18
|
-
|
|
18
|
+
providers = LlmCostTracker.configuration.openai_compatible_providers
|
|
19
|
+
cached = @provider_names
|
|
20
|
+
return cached if cached && @provider_names_providers.equal?(providers)
|
|
21
|
+
|
|
22
|
+
names = [
|
|
19
23
|
"openai_compatible",
|
|
20
|
-
*
|
|
24
|
+
*providers.each_value.map { |provider| provider.to_s.downcase }
|
|
21
25
|
].uniq.freeze
|
|
26
|
+
return names unless providers.frozen?
|
|
27
|
+
|
|
28
|
+
@provider_names_providers = providers
|
|
29
|
+
@provider_names = names
|
|
22
30
|
end
|
|
23
31
|
|
|
24
32
|
def parse(request_url:, request_body:, response_status:, response_body:, **)
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../billing/line_item"
|
|
4
|
+
|
|
5
|
+
module LlmCostTracker
|
|
6
|
+
module Parsers
|
|
7
|
+
module OpenaiServiceCharges
|
|
8
|
+
RESPONSE_OUTPUT_COMPONENTS = {
|
|
9
|
+
"web_search_call" => :web_search_request,
|
|
10
|
+
"file_search_call" => :file_search_call,
|
|
11
|
+
"code_interpreter_call" => :container_session
|
|
12
|
+
}.freeze
|
|
13
|
+
|
|
14
|
+
module_function
|
|
15
|
+
|
|
16
|
+
def line_items_from_output(output_items)
|
|
17
|
+
deduped = {}
|
|
18
|
+
Array(output_items).each { |item| store_output_item(deduped, item) }
|
|
19
|
+
deduped.values.filter_map { |item| build_line_item(item) }
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def billable?(item)
|
|
23
|
+
return false unless item.is_a?(Hash)
|
|
24
|
+
|
|
25
|
+
component = RESPONSE_OUTPUT_COMPONENTS[item["type"]]
|
|
26
|
+
return false unless component
|
|
27
|
+
return true unless component == :web_search_request
|
|
28
|
+
|
|
29
|
+
action_type = item.dig("action", "type")
|
|
30
|
+
action_type.nil? || action_type == "search"
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def store_output_item(output_items, item)
|
|
34
|
+
return unless billable?(item)
|
|
35
|
+
|
|
36
|
+
component = RESPONSE_OUTPUT_COMPONENTS[item["type"]]
|
|
37
|
+
key = if component == :container_session && item["container_id"]
|
|
38
|
+
"#{component}:#{item['container_id']}"
|
|
39
|
+
else
|
|
40
|
+
item["id"] || "#{item['type']}:#{output_items.length}"
|
|
41
|
+
end
|
|
42
|
+
output_items[key] = item
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def build_line_item(item)
|
|
46
|
+
component_key = RESPONSE_OUTPUT_COMPONENTS[item["type"]]
|
|
47
|
+
return nil unless component_key
|
|
48
|
+
|
|
49
|
+
provider_item_id = if component_key == :container_session
|
|
50
|
+
item["container_id"] || item["id"]
|
|
51
|
+
else
|
|
52
|
+
item["id"]
|
|
53
|
+
end
|
|
54
|
+
Billing::LineItem.build(
|
|
55
|
+
component_key: component_key,
|
|
56
|
+
quantity: 1,
|
|
57
|
+
cost_status: Billing::CostStatus::UNKNOWN,
|
|
58
|
+
pricing_basis: :provider_usage,
|
|
59
|
+
provider_field: "response.output.#{item['type']}",
|
|
60
|
+
provider_item_id: provider_item_id,
|
|
61
|
+
details: line_item_details(item)
|
|
62
|
+
)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def line_item_details(item)
|
|
66
|
+
{
|
|
67
|
+
"status" => item["status"],
|
|
68
|
+
"action_type" => item.dig("action", "type"),
|
|
69
|
+
"container_id" => item["container_id"]
|
|
70
|
+
}.compact
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def openai_service_line_items(response)
|
|
74
|
+
line_items_from_output(response["output"])
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def openai_stream_service_line_items(events)
|
|
78
|
+
output_items = []
|
|
79
|
+
each_event_data(events) do |data|
|
|
80
|
+
output_items.concat(Array(data.dig("response", "output")))
|
|
81
|
+
output_items << data["item"] if data["item"]
|
|
82
|
+
end
|
|
83
|
+
line_items_from_output(output_items)
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|