llm_cost_tracker 0.7.3 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ruby-version +1 -0
- data/CHANGELOG.md +173 -0
- data/README.md +60 -220
- data/app/assets/llm_cost_tracker/application.css +282 -45
- data/app/controllers/llm_cost_tracker/application_controller.rb +25 -20
- data/app/controllers/llm_cost_tracker/assets_controller.rb +11 -1
- data/app/controllers/llm_cost_tracker/calls_controller.rb +22 -19
- data/app/controllers/llm_cost_tracker/data_quality_controller.rb +14 -2
- data/app/controllers/llm_cost_tracker/reconciliation_controller.rb +106 -0
- data/app/controllers/llm_cost_tracker/tags_controller.rb +15 -1
- data/app/helpers/llm_cost_tracker/application_helper.rb +18 -21
- data/app/helpers/llm_cost_tracker/dashboard_filter_helper.rb +3 -21
- data/app/helpers/llm_cost_tracker/dashboard_filter_options_helper.rb +4 -4
- data/app/helpers/llm_cost_tracker/dashboard_query_helper.rb +1 -1
- data/app/helpers/llm_cost_tracker/inline_style_helper.rb +28 -0
- data/app/helpers/llm_cost_tracker/reconciliation_helper.rb +13 -0
- data/app/helpers/llm_cost_tracker/token_usage_helper.rb +24 -7
- data/app/models/llm_cost_tracker/call.rb +166 -0
- data/app/models/llm_cost_tracker/call_line_item.rb +18 -0
- data/app/models/llm_cost_tracker/call_rollup.rb +6 -0
- data/app/models/llm_cost_tracker/call_tag.rb +12 -0
- data/app/models/llm_cost_tracker/ingestion/inbox_entry.rb +9 -0
- data/app/models/llm_cost_tracker/ingestion/lease.rb +0 -3
- data/app/models/llm_cost_tracker/provider_invoice.rb +13 -0
- data/app/models/llm_cost_tracker/provider_invoice_import.rb +24 -0
- data/app/services/llm_cost_tracker/dashboard/data_quality.rb +152 -32
- data/app/services/llm_cost_tracker/dashboard/date_range.rb +1 -1
- data/app/services/llm_cost_tracker/dashboard/filter.rb +8 -6
- data/app/services/llm_cost_tracker/dashboard/overview_stats.rb +74 -21
- data/app/services/llm_cost_tracker/dashboard/pagination.rb +6 -4
- data/app/services/llm_cost_tracker/dashboard/params.rb +8 -2
- data/app/services/llm_cost_tracker/dashboard/provider_breakdown.rb +1 -1
- data/app/services/llm_cost_tracker/dashboard/spend_anomaly.rb +4 -3
- data/app/services/llm_cost_tracker/dashboard/tag_breakdown.rb +42 -9
- data/app/services/llm_cost_tracker/dashboard/tag_key_explorer.rb +14 -37
- data/app/services/llm_cost_tracker/dashboard/time_series.rb +1 -1
- data/app/services/llm_cost_tracker/dashboard/top_models.rb +1 -1
- data/app/views/layouts/llm_cost_tracker/application.html.erb +6 -1
- data/app/views/llm_cost_tracker/calls/index.html.erb +33 -75
- data/app/views/llm_cost_tracker/calls/show.html.erb +73 -33
- data/app/views/llm_cost_tracker/dashboard/index.html.erb +16 -57
- data/app/views/llm_cost_tracker/data_quality/index.html.erb +183 -167
- data/app/views/llm_cost_tracker/errors/database.html.erb +1 -1
- data/app/views/llm_cost_tracker/models/index.html.erb +18 -50
- data/app/views/llm_cost_tracker/reconciliation/index.html.erb +183 -0
- data/app/views/llm_cost_tracker/shared/_bar.html.erb +1 -1
- data/app/views/llm_cost_tracker/shared/_filters.html.erb +66 -0
- data/app/views/llm_cost_tracker/shared/_metric_stack.html.erb +1 -1
- data/app/views/llm_cost_tracker/shared/_sort.html.erb +13 -0
- data/app/views/llm_cost_tracker/shared/setup_required.html.erb +1 -1
- data/app/views/llm_cost_tracker/tags/index.html.erb +3 -34
- data/app/views/llm_cost_tracker/tags/show.html.erb +64 -36
- data/config/routes.rb +3 -2
- data/lib/llm_cost_tracker/billing/components.rb +95 -0
- data/lib/llm_cost_tracker/billing/components.yml +188 -0
- data/lib/llm_cost_tracker/billing/cost_status.rb +45 -0
- data/lib/llm_cost_tracker/billing/line_item.rb +189 -0
- data/lib/llm_cost_tracker/budget.rb +26 -36
- data/lib/llm_cost_tracker/capture/stream_collector.rb +125 -38
- data/lib/llm_cost_tracker/capture/stream_tracker.rb +40 -5
- data/lib/llm_cost_tracker/configuration.rb +86 -17
- data/lib/llm_cost_tracker/dashboard_setup_state.rb +109 -0
- data/lib/llm_cost_tracker/doctor/cost_drift_check.rb +56 -0
- data/lib/llm_cost_tracker/doctor/ingestion_check.rb +48 -30
- data/lib/llm_cost_tracker/doctor/invoice_reconciliation_check.rb +164 -0
- data/lib/llm_cost_tracker/doctor/legacy_audit_check.rb +36 -0
- data/lib/llm_cost_tracker/doctor/legacy_billing_status_check.rb +22 -0
- data/lib/llm_cost_tracker/doctor/price_check.rb +2 -2
- data/lib/llm_cost_tracker/doctor/pricing_snapshot_drift_check.rb +85 -0
- data/lib/llm_cost_tracker/doctor/probe.rb +17 -0
- data/lib/llm_cost_tracker/doctor/schema_check.rb +34 -0
- data/lib/llm_cost_tracker/doctor.rb +111 -44
- data/lib/llm_cost_tracker/engine.rb +9 -0
- data/lib/llm_cost_tracker/errors.rb +5 -19
- data/lib/llm_cost_tracker/event.rb +11 -3
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/call_rollups_generator.rb +43 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/durable_ingestion_generator.rb +43 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/install_generator.rb +17 -5
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/prices_generator.rb +2 -6
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/reconciliation_generator.rb +34 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_call_rollups.rb.erb +15 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_calls.rb.erb +104 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_durable_ingestion.rb.erb +29 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_reconciliation.rb.erb +55 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +28 -25
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_call_rollups_provider.rb.erb +20 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_call_tags_key_value_index.rb.erb +32 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_image_tokens.rb.erb +18 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_call_rollups_provider_generator.rb +38 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/{add_provider_response_id_generator.rb → upgrade_call_tags_key_value_index_generator.rb} +5 -4
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/{add_streaming_generator.rb → upgrade_image_tokens_generator.rb} +4 -4
- data/lib/llm_cost_tracker/ingestion/batch.rb +11 -12
- data/lib/llm_cost_tracker/ingestion/inbox.rb +39 -24
- data/lib/llm_cost_tracker/ingestion/inline.rb +22 -0
- data/lib/llm_cost_tracker/ingestion/worker.rb +24 -7
- data/lib/llm_cost_tracker/ingestion.rb +66 -22
- data/lib/llm_cost_tracker/integrations/anthropic.rb +68 -42
- data/lib/llm_cost_tracker/integrations/base.rb +56 -32
- data/lib/llm_cost_tracker/integrations/openai.rb +342 -63
- data/lib/llm_cost_tracker/integrations/ruby_llm.rb +110 -11
- data/lib/llm_cost_tracker/integrations.rb +21 -3
- data/lib/llm_cost_tracker/ledger/period/totals.rb +30 -11
- data/lib/llm_cost_tracker/ledger/period.rb +5 -5
- data/lib/llm_cost_tracker/ledger/rollups/upsert_sql.rb +2 -2
- data/lib/llm_cost_tracker/ledger/rollups.rb +90 -25
- data/lib/llm_cost_tracker/ledger/schema/adapter.rb +18 -0
- data/lib/llm_cost_tracker/ledger/schema/call_line_items.rb +79 -0
- data/lib/llm_cost_tracker/ledger/schema/call_rollups.rb +37 -0
- data/lib/llm_cost_tracker/ledger/schema/call_tags.rb +41 -0
- data/lib/llm_cost_tracker/ledger/schema/calls.rb +36 -23
- data/lib/llm_cost_tracker/ledger/schema/ingestion_inbox_entries.rb +47 -0
- data/lib/llm_cost_tracker/ledger/schema/ingestion_leases.rb +42 -0
- data/lib/llm_cost_tracker/ledger/schema/provider_invoice_imports.rb +46 -0
- data/lib/llm_cost_tracker/ledger/schema/provider_invoices.rb +57 -0
- data/lib/llm_cost_tracker/ledger/store.rb +103 -20
- data/lib/llm_cost_tracker/ledger/tags/encoding.rb +37 -0
- data/lib/llm_cost_tracker/ledger/tags/query.rb +6 -11
- data/lib/llm_cost_tracker/ledger/tags/sql.rb +27 -15
- data/lib/llm_cost_tracker/ledger.rb +5 -2
- data/lib/llm_cost_tracker/logging.rb +2 -5
- data/lib/llm_cost_tracker/masking.rb +39 -0
- data/lib/llm_cost_tracker/middleware/faraday.rb +95 -35
- data/lib/llm_cost_tracker/parsers/anthropic.rb +74 -14
- data/lib/llm_cost_tracker/parsers/base.rb +13 -4
- data/lib/llm_cost_tracker/parsers/gemini.rb +105 -15
- data/lib/llm_cost_tracker/parsers/openai.rb +16 -2
- data/lib/llm_cost_tracker/parsers/openai_compatible.rb +15 -3
- data/lib/llm_cost_tracker/parsers/openai_service_charges.rb +126 -0
- data/lib/llm_cost_tracker/parsers/openai_usage.rb +157 -59
- data/lib/llm_cost_tracker/parsers/sse.rb +1 -1
- data/lib/llm_cost_tracker/parsers.rb +1 -1
- data/lib/llm_cost_tracker/prices.json +198 -22
- data/lib/llm_cost_tracker/pricing/effective_prices.rb +28 -21
- data/lib/llm_cost_tracker/pricing/explainer.rb +4 -5
- data/lib/llm_cost_tracker/pricing/lookup.rb +73 -36
- data/lib/llm_cost_tracker/pricing/mode.rb +76 -0
- data/lib/llm_cost_tracker/pricing/registry.rb +67 -45
- data/lib/llm_cost_tracker/pricing/service_charges.rb +210 -0
- data/lib/llm_cost_tracker/pricing/sync/fetcher.rb +26 -17
- data/lib/llm_cost_tracker/pricing/sync/registry_diff.rb +6 -15
- data/lib/llm_cost_tracker/pricing/sync/registry_writer.rb +50 -1
- data/lib/llm_cost_tracker/pricing/sync.rb +59 -10
- data/lib/llm_cost_tracker/pricing/sync_change_printer.rb +32 -0
- data/lib/llm_cost_tracker/pricing.rb +220 -28
- data/lib/llm_cost_tracker/railtie.rb +6 -8
- data/lib/llm_cost_tracker/reconcile_tasks.rb +134 -0
- data/lib/llm_cost_tracker/reconciliation/diff.rb +428 -0
- data/lib/llm_cost_tracker/reconciliation/diff_result.rb +48 -0
- data/lib/llm_cost_tracker/reconciliation/import_result.rb +19 -0
- data/lib/llm_cost_tracker/reconciliation/importer.rb +253 -0
- data/lib/llm_cost_tracker/reconciliation/sources/anthropic_usage.rb +171 -0
- data/lib/llm_cost_tracker/reconciliation/sources/fingerprint.rb +20 -0
- data/lib/llm_cost_tracker/reconciliation/sources/openai_usage.rb +142 -0
- data/lib/llm_cost_tracker/reconciliation.rb +118 -0
- data/lib/llm_cost_tracker/report/data.rb +19 -8
- data/lib/llm_cost_tracker/report.rb +0 -4
- data/lib/llm_cost_tracker/retention.rb +22 -9
- data/lib/llm_cost_tracker/tags/context.rb +2 -5
- data/lib/llm_cost_tracker/tags/key.rb +4 -0
- data/lib/llm_cost_tracker/tags/sanitizer.rb +71 -20
- data/lib/llm_cost_tracker/timing.rb +15 -0
- data/lib/llm_cost_tracker/token_usage.rb +64 -42
- data/lib/llm_cost_tracker/tracker.rb +97 -27
- data/lib/llm_cost_tracker/usage_capture.rb +29 -8
- data/lib/llm_cost_tracker/version.rb +1 -1
- data/lib/llm_cost_tracker.rb +45 -35
- data/lib/tasks/llm_cost_tracker.rake +45 -17
- metadata +71 -41
- data/app/models/llm_cost_tracker/ingestion/event.rb +0 -13
- data/app/models/llm_cost_tracker/ledger/call.rb +0 -45
- data/app/models/llm_cost_tracker/ledger/call_metrics.rb +0 -66
- data/app/models/llm_cost_tracker/ledger/period/grouping.rb +0 -71
- data/app/models/llm_cost_tracker/ledger/period/total.rb +0 -13
- data/app/models/llm_cost_tracker/ledger/tags/accessors.rb +0 -19
- data/lib/llm_cost_tracker/configuration/instrumentation.rb +0 -33
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_ingestion_generator.rb +0 -29
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_latency_ms_generator.rb +0 -29
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_period_totals_generator.rb +0 -29
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_token_usage_generator.rb +0 -42
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_ingestion_to_llm_cost_tracker.rb.erb +0 -33
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_latency_ms_to_llm_api_calls.rb.erb +0 -9
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_period_totals_to_llm_cost_tracker.rb.erb +0 -104
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_provider_response_id_to_llm_api_calls.rb.erb +0 -15
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_streaming_to_llm_api_calls.rb.erb +0 -21
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_token_usage_to_llm_api_calls.rb.erb +0 -22
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_api_calls.rb.erb +0 -83
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_cost_precision.rb.erb +0 -26
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_tags_to_jsonb.rb.erb +0 -44
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_cost_precision_generator.rb +0 -29
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_tags_to_jsonb_generator.rb +0 -29
- data/lib/llm_cost_tracker/ledger/rollups/batch.rb +0 -43
- data/lib/llm_cost_tracker/ledger/schema/period_totals.rb +0 -32
- data/lib/llm_cost_tracker/pricing/components.rb +0 -37
- data/lib/llm_cost_tracker/pricing/sync/registry_loader.rb +0 -63
|
@@ -15,10 +15,18 @@ module LlmCostTracker
|
|
|
15
15
|
end
|
|
16
16
|
|
|
17
17
|
def provider_names
|
|
18
|
-
|
|
18
|
+
providers = LlmCostTracker.configuration.openai_compatible_providers
|
|
19
|
+
cached = @provider_names
|
|
20
|
+
return cached if cached && @provider_names_providers.equal?(providers)
|
|
21
|
+
|
|
22
|
+
names = [
|
|
19
23
|
"openai_compatible",
|
|
20
|
-
*
|
|
24
|
+
*providers.each_value.map { |provider| provider.to_s.downcase }
|
|
21
25
|
].uniq.freeze
|
|
26
|
+
return names unless providers.frozen?
|
|
27
|
+
|
|
28
|
+
@provider_names_providers = providers
|
|
29
|
+
@provider_names = names
|
|
22
30
|
end
|
|
23
31
|
|
|
24
32
|
def parse(request_url:, request_body:, response_status:, response_body:, **)
|
|
@@ -39,13 +47,17 @@ module LlmCostTracker
|
|
|
39
47
|
)
|
|
40
48
|
end
|
|
41
49
|
|
|
42
|
-
|
|
50
|
+
def auto_enable_stream_usage?(request_url)
|
|
51
|
+
openai_chat_completions_url?(request_url)
|
|
52
|
+
end
|
|
43
53
|
|
|
44
54
|
def provider_for(request_url)
|
|
45
55
|
uri = parsed_uri(request_url)
|
|
46
56
|
provider_for_uri(uri) || "openai_compatible"
|
|
47
57
|
end
|
|
48
58
|
|
|
59
|
+
private
|
|
60
|
+
|
|
49
61
|
def provider_for_uri(uri)
|
|
50
62
|
return nil unless uri
|
|
51
63
|
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../billing/line_item"
|
|
4
|
+
|
|
5
|
+
module LlmCostTracker
|
|
6
|
+
module Parsers
|
|
7
|
+
module OpenaiServiceCharges
|
|
8
|
+
RESPONSE_OUTPUT_COMPONENTS = {
|
|
9
|
+
"web_search_call" => :web_search_request,
|
|
10
|
+
"file_search_call" => :file_search_call,
|
|
11
|
+
"code_interpreter_call" => :container_session,
|
|
12
|
+
"mcp_call" => :mcp_call
|
|
13
|
+
}.freeze
|
|
14
|
+
|
|
15
|
+
REASONING_MODEL_PATTERNS = [
|
|
16
|
+
/\Agpt-5(\b|[\d.-])/i,
|
|
17
|
+
/\Ao\d+(\b|[\d.-])/i
|
|
18
|
+
].freeze
|
|
19
|
+
NON_REASONING_GPT5_PATTERN = /\Agpt-5(?:\.\d+)?-chat\b/i
|
|
20
|
+
private_constant :NON_REASONING_GPT5_PATTERN
|
|
21
|
+
|
|
22
|
+
module_function
|
|
23
|
+
|
|
24
|
+
def line_items_from_output(output_items, request: nil, model: nil)
|
|
25
|
+
deduped = {}
|
|
26
|
+
Array(output_items).each { |item| store_output_item(deduped, item) }
|
|
27
|
+
deduped.values
|
|
28
|
+
.select { |item| billable?(item) }
|
|
29
|
+
.filter_map { |item| build_line_item(item, request: request, model: model) }
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def billable?(item)
|
|
33
|
+
return false unless item.is_a?(Hash)
|
|
34
|
+
|
|
35
|
+
component = RESPONSE_OUTPUT_COMPONENTS[item["type"]]
|
|
36
|
+
return false unless component
|
|
37
|
+
return true unless component == :web_search_request
|
|
38
|
+
|
|
39
|
+
action_type = item.dig("action", "type")
|
|
40
|
+
action_type.nil? || action_type == "search"
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def store_output_item(output_items, item)
|
|
44
|
+
return unless item.is_a?(Hash) && RESPONSE_OUTPUT_COMPONENTS.key?(item["type"])
|
|
45
|
+
|
|
46
|
+
component = RESPONSE_OUTPUT_COMPONENTS[item["type"]]
|
|
47
|
+
key = if component == :container_session && item["container_id"]
|
|
48
|
+
"#{component}:#{item['container_id']}"
|
|
49
|
+
else
|
|
50
|
+
item["id"] || "#{item['type']}:#{output_items.length}"
|
|
51
|
+
end
|
|
52
|
+
output_items[key] = item
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def build_line_item(item, request: nil, model: nil)
|
|
56
|
+
return nil unless item.is_a?(Hash)
|
|
57
|
+
|
|
58
|
+
component_key = component_key_for(item, request: request, model: model)
|
|
59
|
+
return nil unless component_key
|
|
60
|
+
|
|
61
|
+
provider_item_id = if component_key == :container_session
|
|
62
|
+
item["container_id"] || item["id"]
|
|
63
|
+
else
|
|
64
|
+
item["id"]
|
|
65
|
+
end
|
|
66
|
+
Billing::LineItem.build(
|
|
67
|
+
component_key: component_key,
|
|
68
|
+
quantity: 1,
|
|
69
|
+
cost_status: Billing::CostStatus::UNKNOWN,
|
|
70
|
+
pricing_basis: :provider_usage,
|
|
71
|
+
provider_field: "response.output.#{item['type']}",
|
|
72
|
+
provider_item_id: provider_item_id,
|
|
73
|
+
details: line_item_details(item)
|
|
74
|
+
)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def component_key_for(item, request:, model:)
|
|
78
|
+
component = RESPONSE_OUTPUT_COMPONENTS[item["type"]]
|
|
79
|
+
return component unless component == :web_search_request
|
|
80
|
+
return component unless web_search_preview_used?(request)
|
|
81
|
+
|
|
82
|
+
reasoning_model?(model) ? :web_search_preview_request_reasoning : :web_search_preview_request_non_reasoning
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def web_search_preview_used?(request)
|
|
86
|
+
tools = request && (request[:tools] || request["tools"])
|
|
87
|
+
return false unless tools.respond_to?(:each)
|
|
88
|
+
|
|
89
|
+
tools.any? do |tool|
|
|
90
|
+
type = tool.is_a?(Hash) ? (tool[:type] || tool["type"]) : tool
|
|
91
|
+
type.to_s.include?("web_search_preview")
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def reasoning_model?(model)
|
|
96
|
+
return false unless model
|
|
97
|
+
|
|
98
|
+
name = model.to_s.split("/", 2).last
|
|
99
|
+
return false if NON_REASONING_GPT5_PATTERN.match?(name)
|
|
100
|
+
|
|
101
|
+
REASONING_MODEL_PATTERNS.any? { |pattern| pattern.match?(name) }
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def line_item_details(item)
|
|
105
|
+
{
|
|
106
|
+
"status" => item["status"],
|
|
107
|
+
"action_type" => item.dig("action", "type"),
|
|
108
|
+
"container_id" => item["container_id"]
|
|
109
|
+
}.compact
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def openai_service_line_items(response, request: nil)
|
|
113
|
+
line_items_from_output(response["output"], request: request, model: response["model"])
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def openai_stream_service_line_items(events, request: nil, model: nil)
|
|
117
|
+
output_items = []
|
|
118
|
+
each_event_data(events) do |data|
|
|
119
|
+
output_items.concat(Array(data.dig("response", "output")))
|
|
120
|
+
output_items << data["item"] if data["item"]
|
|
121
|
+
end
|
|
122
|
+
line_items_from_output(output_items, request: request, model: model)
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
end
|
|
@@ -1,8 +1,33 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative "openai_service_charges"
|
|
4
|
+
|
|
3
5
|
module LlmCostTracker
|
|
4
6
|
module Parsers
|
|
5
7
|
module OpenaiUsage
|
|
8
|
+
include OpenaiServiceCharges
|
|
9
|
+
|
|
10
|
+
OPENAI_DATA_RESIDENCY_HOST_PATTERN = /\A[a-z]{2,3}\.api\.openai\.com\z/
|
|
11
|
+
|
|
12
|
+
class << self
|
|
13
|
+
def combined_pricing_mode(host:, model:, service_tier:)
|
|
14
|
+
modes = [Pricing.normalize_mode(service_tier)]
|
|
15
|
+
modes << "data_residency" if regional_processing?(host: host, model: model)
|
|
16
|
+
modes = modes.compact.uniq
|
|
17
|
+
modes.empty? ? nil : modes.join("_")
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def regional_processing?(host:, model:)
|
|
21
|
+
host.to_s.downcase.match?(OPENAI_DATA_RESIDENCY_HOST_PATTERN) && data_residency_model?(model)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def data_residency_model?(model)
|
|
25
|
+
model.to_s.match?(
|
|
26
|
+
/\Agpt-5\.(?:4|5)(?:-(?:mini|nano|pro|codex(?:-mini|-max)?))?(?:-\d{4}-\d{2}-\d{2})?\z/
|
|
27
|
+
)
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
6
31
|
private
|
|
7
32
|
|
|
8
33
|
def parse_openai_usage(request_url:, request_body:, response_status:, response_body:)
|
|
@@ -26,8 +51,9 @@ module LlmCostTracker
|
|
|
26
51
|
service_tier: response["service_tier"] || request["service_tier"]
|
|
27
52
|
),
|
|
28
53
|
model: model,
|
|
29
|
-
token_usage: token_usage(usage: usage, cache_read: cache_read),
|
|
30
|
-
usage_source: :response
|
|
54
|
+
token_usage: token_usage(usage: usage, cache_read: cache_read, model: model),
|
|
55
|
+
usage_source: :response,
|
|
56
|
+
service_line_items: openai_service_line_items(response, request: request)
|
|
31
57
|
)
|
|
32
58
|
end
|
|
33
59
|
|
|
@@ -35,99 +61,171 @@ module LlmCostTracker
|
|
|
35
61
|
return nil unless response_status == 200
|
|
36
62
|
|
|
37
63
|
request = safe_json_parse(request_body)
|
|
38
|
-
model =
|
|
39
|
-
find_event_value(events) { |data| data["model"] || data.dig("response", "model") } || request["model"]
|
|
40
64
|
usage = detect_stream_usage(events)
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
65
|
+
context = stream_capture_context(events: events, request: request, request_url: request_url)
|
|
66
|
+
|
|
67
|
+
return build_known_stream_usage(usage: usage, **context) if usage
|
|
68
|
+
|
|
69
|
+
warn_missing_stream_usage(request_url: request_url, request: request)
|
|
70
|
+
build_unknown_stream_usage(**context)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def stream_capture_context(events:, request:, request_url:)
|
|
74
|
+
model = find_event_value(events) do |data|
|
|
75
|
+
data["model"] || data.dig("response", "model") || data.dig("chunk", "model")
|
|
76
|
+
end || request["model"]
|
|
77
|
+
{
|
|
78
|
+
provider: provider_for(request_url),
|
|
79
|
+
model: model,
|
|
80
|
+
provider_response_id: find_event_value(events) do |data|
|
|
81
|
+
data["id"] || data.dig("response", "id") || data.dig("chunk", "id")
|
|
82
|
+
end,
|
|
83
|
+
pricing_mode: pricing_mode(
|
|
84
|
+
request_url: request_url,
|
|
85
|
+
model: model,
|
|
86
|
+
service_tier: stream_pricing_mode(events) || request["service_tier"]
|
|
87
|
+
),
|
|
88
|
+
service_line_items: openai_stream_service_line_items(events, request: request, model: model)
|
|
89
|
+
}
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def build_known_stream_usage(usage:, provider:, model:, provider_response_id:, pricing_mode:, service_line_items:)
|
|
93
|
+
cache_read = cache_read_input_tokens(usage)
|
|
94
|
+
UsageCapture.build(
|
|
95
|
+
provider: provider,
|
|
96
|
+
provider_response_id: provider_response_id,
|
|
97
|
+
pricing_mode: pricing_mode,
|
|
44
98
|
model: model,
|
|
45
|
-
|
|
99
|
+
token_usage: token_usage(usage: usage, cache_read: cache_read, model: model),
|
|
100
|
+
stream: true,
|
|
101
|
+
usage_source: :stream_final,
|
|
102
|
+
service_line_items: service_line_items
|
|
46
103
|
)
|
|
104
|
+
end
|
|
47
105
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
provider_response_id: response_id,
|
|
64
|
-
pricing_mode: pricing_mode
|
|
65
|
-
)
|
|
66
|
-
end
|
|
106
|
+
def warn_missing_stream_usage(request_url:, request:)
|
|
107
|
+
return unless request.is_a?(Hash) && request["stream"]
|
|
108
|
+
return unless openai_chat_completions_url?(request_url)
|
|
109
|
+
return if request.dig("stream_options", "include_usage")
|
|
110
|
+
|
|
111
|
+
Logging.warn(
|
|
112
|
+
"OpenAI-compatible chat-completions stream finished without a final usage chunk. " \
|
|
113
|
+
"Set `stream_options: { include_usage: true }` in your request body so the gem can " \
|
|
114
|
+
"record token counts. This call was stored with usage_source=unknown."
|
|
115
|
+
)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def openai_chat_completions_url?(request_url)
|
|
119
|
+
uri = parsed_uri(request_url)
|
|
120
|
+
uri && uri.path.to_s.end_with?("/chat/completions")
|
|
67
121
|
end
|
|
68
122
|
|
|
69
123
|
def detect_stream_usage(events)
|
|
70
124
|
find_event_value(events, reverse: true) do |data|
|
|
71
|
-
usage = data["usage"] || data.dig("response", "usage")
|
|
125
|
+
usage = data["usage"] || data.dig("response", "usage") || data.dig("chunk", "usage")
|
|
72
126
|
usage if usage.is_a?(Hash)
|
|
73
127
|
end
|
|
74
128
|
end
|
|
75
129
|
|
|
76
130
|
def stream_pricing_mode(events)
|
|
77
131
|
find_event_value(events, reverse: true) do |data|
|
|
78
|
-
data["service_tier"] || data.dig("response", "service_tier")
|
|
132
|
+
data["service_tier"] || data.dig("response", "service_tier") || data.dig("chunk", "service_tier")
|
|
79
133
|
end
|
|
80
134
|
end
|
|
81
135
|
|
|
82
136
|
def pricing_mode(request_url:, model:, service_tier:)
|
|
83
|
-
|
|
84
|
-
modes << "data_residency" if openai_regional_processing?(request_url: request_url, model: model)
|
|
85
|
-
modes = modes.compact.uniq
|
|
86
|
-
modes.empty? ? nil : modes.join("_")
|
|
137
|
+
OpenaiUsage.combined_pricing_mode(host: parsed_uri(request_url)&.host, model: model, service_tier: service_tier)
|
|
87
138
|
end
|
|
88
139
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
140
|
+
IMAGE_OUTPUT_MODEL_PATTERN = /\Agpt-image-/i
|
|
141
|
+
private_constant :IMAGE_OUTPUT_MODEL_PATTERN
|
|
142
|
+
|
|
143
|
+
def token_usage(usage:, cache_read:, model: nil)
|
|
144
|
+
audio_input = audio_input_tokens(usage)
|
|
145
|
+
audio_output = audio_output_tokens(usage)
|
|
146
|
+
image_input = image_input_tokens(usage)
|
|
147
|
+
image_output_details = image_output_tokens(usage)
|
|
148
|
+
text_output_details = text_output_tokens(usage)
|
|
149
|
+
raw_output = (usage["completion_tokens"] || usage["output_tokens"]).to_i
|
|
150
|
+
image_output, regular_output_remainder = split_stream_image_output(
|
|
151
|
+
raw_output: raw_output, image_output_details: image_output_details,
|
|
152
|
+
text_output_details: text_output_details, audio_output: audio_output,
|
|
153
|
+
default_to_image: model.to_s.match?(IMAGE_OUTPUT_MODEL_PATTERN)
|
|
154
|
+
)
|
|
99
155
|
|
|
100
|
-
def token_usage(usage:, cache_read:)
|
|
101
156
|
TokenUsage.build(
|
|
102
|
-
input_tokens: regular_input_tokens(
|
|
103
|
-
|
|
104
|
-
|
|
157
|
+
input_tokens: regular_input_tokens(
|
|
158
|
+
usage: usage, cache_read: cache_read, audio_input: audio_input, image_input: image_input
|
|
159
|
+
),
|
|
160
|
+
output_tokens: regular_output_remainder,
|
|
161
|
+
total_tokens: usage["total_tokens"],
|
|
105
162
|
cache_read_input_tokens: cache_read,
|
|
163
|
+
audio_input_tokens: audio_input,
|
|
164
|
+
audio_output_tokens: audio_output,
|
|
165
|
+
image_input_tokens: image_input,
|
|
166
|
+
image_output_tokens: image_output,
|
|
106
167
|
hidden_output_tokens: hidden_output_tokens(usage)
|
|
107
168
|
)
|
|
108
169
|
end
|
|
109
170
|
|
|
110
|
-
def
|
|
111
|
-
|
|
171
|
+
def split_stream_image_output(raw_output:, image_output_details:, text_output_details:, audio_output:,
|
|
172
|
+
default_to_image: false)
|
|
173
|
+
if image_output_details.zero? && text_output_details.zero?
|
|
174
|
+
remainder = [raw_output - audio_output, 0].max
|
|
175
|
+
return default_to_image ? [remainder, 0] : [0, remainder]
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
text_output = text_output_details
|
|
179
|
+
text_output = [raw_output - image_output_details - audio_output, 0].max if text_output.zero?
|
|
180
|
+
[image_output_details, text_output]
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
def regular_input_tokens(usage:, cache_read:, audio_input:, image_input:)
|
|
184
|
+
raw = (usage["prompt_tokens"] || usage["input_tokens"]).to_i
|
|
185
|
+
[raw - cache_read - audio_input - image_input, 0].max
|
|
112
186
|
end
|
|
113
187
|
|
|
114
188
|
def cache_read_input_tokens(usage)
|
|
115
|
-
details = usage
|
|
116
|
-
details["cached_tokens"]
|
|
189
|
+
details = input_token_details(usage)
|
|
190
|
+
details["cached_tokens"].to_i
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
def audio_input_tokens(usage)
|
|
194
|
+
details = input_token_details(usage)
|
|
195
|
+
details["audio_tokens"].to_i
|
|
117
196
|
end
|
|
118
197
|
|
|
119
198
|
def hidden_output_tokens(usage)
|
|
120
|
-
details = usage
|
|
121
|
-
details["reasoning_tokens"]
|
|
199
|
+
details = output_token_details(usage)
|
|
200
|
+
details["reasoning_tokens"].to_i
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
def audio_output_tokens(usage)
|
|
204
|
+
details = output_token_details(usage)
|
|
205
|
+
details["audio_tokens"].to_i
|
|
122
206
|
end
|
|
123
207
|
|
|
124
|
-
def
|
|
125
|
-
|
|
126
|
-
|
|
208
|
+
def image_input_tokens(usage)
|
|
209
|
+
details = input_token_details(usage)
|
|
210
|
+
details["image_tokens"].to_i
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
def image_output_tokens(usage)
|
|
214
|
+
details = output_token_details(usage)
|
|
215
|
+
details["image_tokens"].to_i
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
def text_output_tokens(usage)
|
|
219
|
+
details = output_token_details(usage)
|
|
220
|
+
details["text_tokens"].to_i
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
def input_token_details(usage)
|
|
224
|
+
usage["prompt_tokens_details"] || usage["input_tokens_details"] || usage["input_token_details"] || {}
|
|
225
|
+
end
|
|
127
226
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
(usage["completion_tokens"] || usage["output_tokens"]).to_i
|
|
227
|
+
def output_token_details(usage)
|
|
228
|
+
usage["completion_tokens_details"] || usage["output_tokens_details"] || usage["output_token_details"] || {}
|
|
131
229
|
end
|
|
132
230
|
end
|
|
133
231
|
end
|
|
@@ -13,7 +13,7 @@ module LlmCostTracker
|
|
|
13
13
|
def find_for_provider(provider)
|
|
14
14
|
provider_name = provider.to_s.downcase
|
|
15
15
|
BUILT_INS.find do |parser|
|
|
16
|
-
|
|
16
|
+
parser.provider_names.include?(provider_name)
|
|
17
17
|
end
|
|
18
18
|
end
|
|
19
19
|
end
|