llm_cost_tracker 0.10.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +82 -0
- data/README.md +11 -5
- data/app/assets/llm_cost_tracker/application.css +784 -802
- data/app/controllers/llm_cost_tracker/application_controller.rb +14 -2
- data/app/controllers/llm_cost_tracker/calls_controller.rb +28 -21
- data/app/controllers/llm_cost_tracker/dashboard_controller.rb +1 -4
- data/app/controllers/llm_cost_tracker/models_controller.rb +3 -1
- data/app/controllers/llm_cost_tracker/pricing_controller.rb +16 -0
- data/app/controllers/llm_cost_tracker/tags_controller.rb +3 -1
- data/app/helpers/llm_cost_tracker/application_helper.rb +19 -16
- data/app/helpers/llm_cost_tracker/chart_helper.rb +22 -6
- data/app/helpers/llm_cost_tracker/dashboard_filter_options_helper.rb +1 -11
- data/app/helpers/llm_cost_tracker/sortable_table_helper.rb +41 -0
- data/app/helpers/llm_cost_tracker/token_usage_helper.rb +4 -6
- data/app/models/llm_cost_tracker/call.rb +28 -63
- data/app/models/llm_cost_tracker/call_line_item.rb +2 -2
- data/app/models/llm_cost_tracker/call_rollup.rb +38 -0
- data/app/models/llm_cost_tracker/call_tag.rb +0 -2
- data/app/models/llm_cost_tracker/ingestion/inbox_entry.rb +2 -0
- data/app/services/llm_cost_tracker/dashboard/data_quality.rb +64 -43
- data/app/services/llm_cost_tracker/dashboard/filter.rb +5 -0
- data/app/services/llm_cost_tracker/dashboard/masking.rb +31 -0
- data/app/services/llm_cost_tracker/dashboard/monthly_budget.rb +63 -0
- data/app/services/llm_cost_tracker/dashboard/overview_stats.rb +5 -71
- data/app/services/llm_cost_tracker/dashboard/pagination.rb +2 -5
- data/app/services/llm_cost_tracker/dashboard/pricing_overview.rb +81 -0
- data/app/services/llm_cost_tracker/dashboard/setup_state.rb +6 -68
- data/app/services/llm_cost_tracker/dashboard/sort.rb +9 -0
- data/app/services/llm_cost_tracker/dashboard/tag_breakdown.rb +20 -12
- data/app/services/llm_cost_tracker/dashboard/tag_key_explorer.rb +1 -1
- data/app/services/llm_cost_tracker/dashboard/top_models.rb +34 -19
- data/app/views/layouts/llm_cost_tracker/application.html.erb +74 -17
- data/app/views/llm_cost_tracker/calls/index.html.erb +69 -90
- data/app/views/llm_cost_tracker/calls/show.html.erb +132 -125
- data/app/views/llm_cost_tracker/dashboard/index.html.erb +120 -159
- data/app/views/llm_cost_tracker/data_quality/index.html.erb +140 -194
- data/app/views/llm_cost_tracker/errors/database.html.erb +2 -2
- data/app/views/llm_cost_tracker/models/index.html.erb +39 -59
- data/app/views/llm_cost_tracker/pricing/index.html.erb +93 -0
- data/app/views/llm_cost_tracker/shared/_filter_pill_date.html.erb +19 -0
- data/app/views/llm_cost_tracker/shared/_filter_pill_model.html.erb +22 -0
- data/app/views/llm_cost_tracker/shared/_filter_pill_provider.html.erb +22 -0
- data/app/views/llm_cost_tracker/shared/_filter_pill_stream.html.erb +23 -0
- data/app/views/llm_cost_tracker/shared/_spend_chart.html.erb +3 -13
- data/app/views/llm_cost_tracker/shared/_tag_chips.html.erb +1 -1
- data/app/views/llm_cost_tracker/shared/setup_required.html.erb +16 -15
- data/app/views/llm_cost_tracker/tags/index.html.erb +27 -32
- data/app/views/llm_cost_tracker/tags/show.html.erb +85 -104
- data/config/routes.rb +3 -3
- data/lib/llm_cost_tracker/budget.rb +25 -28
- data/lib/llm_cost_tracker/capture/sdk_payload.rb +34 -0
- data/lib/llm_cost_tracker/{parsers → capture}/sse.rb +2 -1
- data/lib/llm_cost_tracker/capture/stream_collector.rb +30 -52
- data/lib/llm_cost_tracker/capture/stream_tracker.rb +18 -33
- data/lib/llm_cost_tracker/capture_verifier.rb +59 -0
- data/lib/llm_cost_tracker/charges/cost.rb +27 -0
- data/lib/llm_cost_tracker/{billing → charges}/cost_status.rb +14 -4
- data/lib/llm_cost_tracker/{billing → charges}/line_item.rb +40 -44
- data/lib/llm_cost_tracker/check.rb +5 -0
- data/lib/llm_cost_tracker/configuration.rb +13 -61
- data/lib/llm_cost_tracker/currency.rb +5 -0
- data/lib/llm_cost_tracker/doctor/ingestion_check.rb +15 -49
- data/lib/llm_cost_tracker/doctor/price_check.rb +1 -1
- data/lib/llm_cost_tracker/doctor/probe.rb +3 -4
- data/lib/llm_cost_tracker/doctor/schema_check.rb +3 -6
- data/lib/llm_cost_tracker/doctor.rb +66 -64
- data/lib/llm_cost_tracker/engine.rb +4 -4
- data/lib/llm_cost_tracker/event.rb +12 -20
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/install_generator.rb +2 -3
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/prices_generator.rb +5 -2
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_calls.rb.erb +4 -5
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +3 -2
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_call_rollups_provider.rb.erb +4 -0
- data/lib/llm_cost_tracker/ingestion/batch.rb +39 -8
- data/lib/llm_cost_tracker/ingestion/inbox.rb +8 -9
- data/lib/llm_cost_tracker/ingestion/pool.rb +3 -11
- data/lib/llm_cost_tracker/ingestion/worker.rb +7 -17
- data/lib/llm_cost_tracker/ingestion.rb +24 -36
- data/lib/llm_cost_tracker/integrations/anthropic.rb +94 -116
- data/lib/llm_cost_tracker/integrations/base.rb +39 -57
- data/lib/llm_cost_tracker/integrations/openai/batch_capture.rb +84 -0
- data/lib/llm_cost_tracker/integrations/openai/patches.rb +81 -0
- data/lib/llm_cost_tracker/integrations/openai.rb +72 -332
- data/lib/llm_cost_tracker/integrations/ruby_llm.rb +89 -145
- data/lib/llm_cost_tracker/integrations.rb +32 -25
- data/lib/llm_cost_tracker/ledger/period/totals.rb +27 -42
- data/lib/llm_cost_tracker/ledger/period.rb +5 -10
- data/lib/llm_cost_tracker/ledger/rollups.rb +67 -98
- data/lib/llm_cost_tracker/ledger/schema/adapter.rb +12 -13
- data/lib/llm_cost_tracker/ledger/schema/base.rb +51 -0
- data/lib/llm_cost_tracker/ledger/schema/call_line_items.rb +24 -79
- data/lib/llm_cost_tracker/ledger/schema/call_rollups.rb +3 -35
- data/lib/llm_cost_tracker/ledger/schema/call_tags.rb +4 -41
- data/lib/llm_cost_tracker/ledger/schema/calls.rb +30 -99
- data/lib/llm_cost_tracker/ledger/schema/ingestion/inbox_entries.rb +26 -0
- data/lib/llm_cost_tracker/ledger/schema/ingestion/leases.rb +17 -0
- data/lib/llm_cost_tracker/ledger/schema.rb +26 -0
- data/lib/llm_cost_tracker/ledger/store.rb +18 -42
- data/lib/llm_cost_tracker/ledger/tags/{sql.rb → breakdown.rb} +1 -1
- data/lib/llm_cost_tracker/ledger/tags/encoding.rb +4 -6
- data/lib/llm_cost_tracker/ledger.rb +14 -11
- data/lib/llm_cost_tracker/logging.rb +4 -21
- data/lib/llm_cost_tracker/middleware/faraday.rb +63 -51
- data/lib/llm_cost_tracker/parsers.rb +140 -29
- data/lib/llm_cost_tracker/prices.json +1707 -1
- data/lib/llm_cost_tracker/pricing/backfill.rb +52 -80
- data/lib/llm_cost_tracker/pricing/calculation.rb +260 -0
- data/lib/llm_cost_tracker/pricing/effective_prices.rb +17 -18
- data/lib/llm_cost_tracker/pricing/estimator.rb +2 -2
- data/lib/llm_cost_tracker/pricing/matcher.rb +84 -0
- data/lib/llm_cost_tracker/pricing/mode.rb +53 -35
- data/lib/llm_cost_tracker/pricing/price_key.rb +56 -0
- data/lib/llm_cost_tracker/pricing/rate.rb +18 -0
- data/lib/llm_cost_tracker/pricing/registry.rb +189 -100
- data/lib/llm_cost_tracker/pricing/service_rates.rb +69 -0
- data/lib/llm_cost_tracker/pricing/source.rb +7 -0
- data/lib/llm_cost_tracker/pricing/sync/fetcher.rb +2 -3
- data/lib/llm_cost_tracker/pricing/sync/registry_diff.rb +4 -10
- data/lib/llm_cost_tracker/pricing/sync/registry_writer.rb +10 -3
- data/lib/llm_cost_tracker/pricing/sync.rb +9 -11
- data/lib/llm_cost_tracker/pricing/unknown.rb +1 -5
- data/lib/llm_cost_tracker/pricing.rb +10 -295
- data/lib/llm_cost_tracker/providers/anthropic/parser.rb +93 -0
- data/lib/llm_cost_tracker/providers/anthropic/response_parser.rb +30 -0
- data/lib/llm_cost_tracker/providers/anthropic/usage_extractor.rb +76 -0
- data/lib/llm_cost_tracker/providers/azure/hosts.rb +1 -4
- data/lib/llm_cost_tracker/providers/azure/parser.rb +44 -0
- data/lib/llm_cost_tracker/providers/gemini/model_families.rb +1 -4
- data/lib/llm_cost_tracker/providers/gemini/parser.rb +177 -0
- data/lib/llm_cost_tracker/providers/gemini/usage_extractor.rb +76 -0
- data/lib/llm_cost_tracker/providers/openai/hosts.rb +1 -7
- data/lib/llm_cost_tracker/providers/openai/model_families.rb +5 -8
- data/lib/llm_cost_tracker/providers/openai/parser.rb +39 -0
- data/lib/llm_cost_tracker/providers/openai/response_parser.rb +152 -0
- data/lib/llm_cost_tracker/providers/openai/service_charges.rb +181 -0
- data/lib/llm_cost_tracker/providers/openai/usage_extractor.rb +72 -0
- data/lib/llm_cost_tracker/providers/openai_compatible/parser.rb +36 -0
- data/lib/llm_cost_tracker/providers.rb +35 -0
- data/lib/llm_cost_tracker/railtie.rb +0 -7
- data/lib/llm_cost_tracker/report/data.rb +3 -4
- data/lib/llm_cost_tracker/report/formatter.rb +33 -20
- data/lib/llm_cost_tracker/report.rb +1 -1
- data/lib/llm_cost_tracker/retention.rb +6 -19
- data/lib/llm_cost_tracker/tags/context.rb +9 -6
- data/lib/llm_cost_tracker/tags/sanitizer.rb +10 -0
- data/lib/llm_cost_tracker/timing.rb +2 -4
- data/lib/llm_cost_tracker/tracker.rb +24 -36
- data/lib/llm_cost_tracker/usage/catalog.rb +58 -0
- data/lib/llm_cost_tracker/usage/dimension.rb +21 -0
- data/lib/llm_cost_tracker/{billing/components.yml → usage/dimensions.yml} +24 -46
- data/lib/llm_cost_tracker/usage/source.rb +14 -0
- data/lib/llm_cost_tracker/usage/token_usage.rb +100 -0
- data/lib/llm_cost_tracker/version.rb +1 -1
- data/lib/llm_cost_tracker.rb +43 -52
- data/lib/tasks/llm_cost_tracker.rake +14 -73
- metadata +92 -58
- data/app/controllers/llm_cost_tracker/reconciliation_controller.rb +0 -106
- data/app/helpers/llm_cost_tracker/dashboard_filter_helper.rb +0 -28
- data/app/helpers/llm_cost_tracker/reconciliation_helper.rb +0 -13
- data/app/models/llm_cost_tracker/provider_invoice.rb +0 -13
- data/app/models/llm_cost_tracker/provider_invoice_import.rb +0 -29
- data/app/views/llm_cost_tracker/reconciliation/index.html.erb +0 -183
- data/app/views/llm_cost_tracker/shared/_active_filters.html.erb +0 -16
- data/app/views/llm_cost_tracker/shared/_filters.html.erb +0 -66
- data/app/views/llm_cost_tracker/shared/_sort.html.erb +0 -13
- data/lib/llm_cost_tracker/billing/components.rb +0 -95
- data/lib/llm_cost_tracker/capture/stream.rb +0 -9
- data/lib/llm_cost_tracker/doctor/capture_verifier.rb +0 -61
- data/lib/llm_cost_tracker/doctor/check.rb +0 -7
- data/lib/llm_cost_tracker/doctor/cost_drift_check.rb +0 -56
- data/lib/llm_cost_tracker/doctor/invoice_reconciliation_check.rb +0 -164
- data/lib/llm_cost_tracker/doctor/legacy_audit_check.rb +0 -34
- data/lib/llm_cost_tracker/doctor/legacy_billing_status_check.rb +0 -20
- data/lib/llm_cost_tracker/doctor/pricing_snapshot_drift_check.rb +0 -85
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/reconciliation_generator.rb +0 -34
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_reconciliation.rb.erb +0 -60
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_provider_invoice_imports_provider.rb.erb +0 -32
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_provider_invoices_metadata_index.rb.erb +0 -25
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_provider_invoice_imports_provider_generator.rb +0 -31
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_provider_invoices_metadata_index_generator.rb +0 -31
- data/lib/llm_cost_tracker/ledger/rollups/upsert_sql.rb +0 -40
- data/lib/llm_cost_tracker/ledger/schema/ingestion_inbox_entries.rb +0 -57
- data/lib/llm_cost_tracker/ledger/schema/ingestion_leases.rb +0 -52
- data/lib/llm_cost_tracker/ledger/schema/provider_invoice_imports.rb +0 -56
- data/lib/llm_cost_tracker/ledger/schema/provider_invoices.rb +0 -72
- data/lib/llm_cost_tracker/masking.rb +0 -39
- data/lib/llm_cost_tracker/parsers/anthropic.rb +0 -193
- data/lib/llm_cost_tracker/parsers/azure.rb +0 -46
- data/lib/llm_cost_tracker/parsers/base.rb +0 -131
- data/lib/llm_cost_tracker/parsers/gemini.rb +0 -232
- data/lib/llm_cost_tracker/parsers/openai.rb +0 -41
- data/lib/llm_cost_tracker/parsers/openai_compatible.rb +0 -51
- data/lib/llm_cost_tracker/parsers/openai_service_charges.rb +0 -155
- data/lib/llm_cost_tracker/parsers/openai_usage.rb +0 -228
- data/lib/llm_cost_tracker/pricing/explainer.rb +0 -74
- data/lib/llm_cost_tracker/pricing/lookup.rb +0 -236
- data/lib/llm_cost_tracker/pricing/service_charges.rb +0 -206
- data/lib/llm_cost_tracker/providers/anthropic/tier_classification.rb +0 -22
- data/lib/llm_cost_tracker/reconcile_tasks.rb +0 -134
- data/lib/llm_cost_tracker/reconciliation/diff.rb +0 -409
- data/lib/llm_cost_tracker/reconciliation/diff_result.rb +0 -44
- data/lib/llm_cost_tracker/reconciliation/import_result.rb +0 -19
- data/lib/llm_cost_tracker/reconciliation/importer.rb +0 -254
- data/lib/llm_cost_tracker/reconciliation/sources/anthropic_usage.rb +0 -172
- data/lib/llm_cost_tracker/reconciliation/sources/fingerprint.rb +0 -20
- data/lib/llm_cost_tracker/reconciliation/sources/openai_usage.rb +0 -142
- data/lib/llm_cost_tracker/reconciliation.rb +0 -118
- data/lib/llm_cost_tracker/token_usage.rb +0 -93
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LlmCostTracker
|
|
4
|
+
module Providers
|
|
5
|
+
module Azure
|
|
6
|
+
class Parser < LlmCostTracker::Parsers::Base
|
|
7
|
+
include Openai::ResponseParser
|
|
8
|
+
|
|
9
|
+
TRACKED_ENDPOINTS = %w[
|
|
10
|
+
chat/completions completions embeddings moderations responses
|
|
11
|
+
audio/transcriptions audio/translations audio/speech
|
|
12
|
+
images/generations images/edits images/variations
|
|
13
|
+
].freeze
|
|
14
|
+
|
|
15
|
+
PATH_PATTERN = %r{\A/openai/(?:deployments/[^/]+|v1)/(?:#{TRACKED_ENDPOINTS.join('|')})\z}
|
|
16
|
+
|
|
17
|
+
class << self
|
|
18
|
+
def match?(url)
|
|
19
|
+
uri_matches?(url) do |uri|
|
|
20
|
+
Hosts.openai?(uri.host) && uri.path.to_s.match?(PATH_PATTERN)
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def provider_names
|
|
25
|
+
%w[azure_openai]
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def provider_for(_request_url)
|
|
30
|
+
"azure_openai"
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def model_for(request_url, request_parsed)
|
|
34
|
+
body_model = super
|
|
35
|
+
return body_model if body_model
|
|
36
|
+
|
|
37
|
+
uri = parsed_uri(request_url)
|
|
38
|
+
match = uri&.path&.match(%r{/openai/deployments/([^/]+)/})
|
|
39
|
+
match && match[1]
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
@@ -5,10 +5,7 @@ module LlmCostTracker
|
|
|
5
5
|
module Gemini
|
|
6
6
|
module ModelFamilies
|
|
7
7
|
PER_QUERY_GROUNDING_MODEL_PATTERN = /\bgemini-(?:[3-9]|[1-9]\d)\b/i
|
|
8
|
-
|
|
9
|
-
module_function
|
|
10
|
-
|
|
11
|
-
def per_query_grounding?(model)
|
|
8
|
+
def self.per_query_grounding?(model)
|
|
12
9
|
model.to_s.match?(PER_QUERY_GROUNDING_MODEL_PATTERN)
|
|
13
10
|
end
|
|
14
11
|
end
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LlmCostTracker
|
|
4
|
+
module Providers
|
|
5
|
+
module Gemini
|
|
6
|
+
class Parser < LlmCostTracker::Parsers::Base
|
|
7
|
+
HOSTS = %w[generativelanguage.googleapis.com].freeze
|
|
8
|
+
TRACKED_PATH_PATTERN = %r{/models/[^/:]+:(?:generateContent|streamGenerateContent)\z}
|
|
9
|
+
STREAM_PATH_PATTERN = /:streamGenerateContent\z/
|
|
10
|
+
|
|
11
|
+
class << self
|
|
12
|
+
def match?(url)
|
|
13
|
+
match_uri?(url, hosts: HOSTS, path_pattern: TRACKED_PATH_PATTERN)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def provider_names
|
|
17
|
+
%w[gemini]
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def streaming_request?(request_url, request_parsed)
|
|
22
|
+
return true if match_uri?(request_url, path_pattern: STREAM_PATH_PATTERN)
|
|
23
|
+
|
|
24
|
+
super
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def parse(request_url:, request_body:, response_status:, response_body:, response_headers: nil)
|
|
28
|
+
return nil unless response_status == 200
|
|
29
|
+
|
|
30
|
+
response = safe_json_parse(response_body)
|
|
31
|
+
usage = response["usageMetadata"]
|
|
32
|
+
return nil unless usage
|
|
33
|
+
|
|
34
|
+
request = safe_json_parse(request_body)
|
|
35
|
+
model = extract_model_from_url(request_url)
|
|
36
|
+
build_event(
|
|
37
|
+
request_url: request_url,
|
|
38
|
+
usage: usage,
|
|
39
|
+
usage_source: Usage::Source::RESPONSE,
|
|
40
|
+
provider_response_id: response["responseId"],
|
|
41
|
+
pricing_mode: pricing_mode(request: request, usage: usage, response_headers: response_headers),
|
|
42
|
+
service_line_items: grounding_line_items(grounding_request_count(response["candidates"]), model: model)
|
|
43
|
+
)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def parse_stream(response_status:, request_url: nil, request_body: nil, events: [], response_headers: nil)
|
|
47
|
+
return nil unless response_status == 200
|
|
48
|
+
|
|
49
|
+
request = safe_json_parse(request_body)
|
|
50
|
+
usage = merged_stream_usage(events)
|
|
51
|
+
model = extract_model_from_url(request_url)
|
|
52
|
+
response_id = stream_response_id(events)
|
|
53
|
+
mode = pricing_mode(request: request, usage: usage, response_headers: response_headers)
|
|
54
|
+
service_line_items = grounding_line_items_for_stream(events, model: model)
|
|
55
|
+
|
|
56
|
+
if usage
|
|
57
|
+
build_event(
|
|
58
|
+
request_url: request_url,
|
|
59
|
+
usage: usage,
|
|
60
|
+
stream: true,
|
|
61
|
+
usage_source: Usage::Source::STREAM_FINAL,
|
|
62
|
+
provider_response_id: response_id,
|
|
63
|
+
pricing_mode: mode,
|
|
64
|
+
service_line_items: service_line_items
|
|
65
|
+
)
|
|
66
|
+
else
|
|
67
|
+
build_unknown_stream_usage(
|
|
68
|
+
provider: "gemini",
|
|
69
|
+
model: model,
|
|
70
|
+
provider_response_id: response_id,
|
|
71
|
+
pricing_mode: mode,
|
|
72
|
+
service_line_items: service_line_items
|
|
73
|
+
)
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def model_for(request_url, _request_parsed)
|
|
78
|
+
extract_model_from_url(request_url)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def provider_for(_request_url)
|
|
82
|
+
"gemini"
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
private
|
|
86
|
+
|
|
87
|
+
def build_event(request_url:,
|
|
88
|
+
usage:,
|
|
89
|
+
usage_source:,
|
|
90
|
+
stream: false,
|
|
91
|
+
provider_response_id: nil,
|
|
92
|
+
pricing_mode: nil,
|
|
93
|
+
service_line_items: nil)
|
|
94
|
+
Event.build(
|
|
95
|
+
provider: "gemini",
|
|
96
|
+
model: extract_model_from_url(request_url),
|
|
97
|
+
pricing_mode: pricing_mode,
|
|
98
|
+
token_usage: UsageExtractor.token_usage(usage),
|
|
99
|
+
stream: stream,
|
|
100
|
+
usage_source: usage_source,
|
|
101
|
+
provider_response_id: provider_response_id,
|
|
102
|
+
service_line_items: service_line_items
|
|
103
|
+
)
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def merged_stream_usage(events)
|
|
107
|
+
find_event_value(events, reverse: true) do |data|
|
|
108
|
+
meta = data["usageMetadata"]
|
|
109
|
+
meta if meta.is_a?(Hash)
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def stream_response_id(events)
|
|
114
|
+
find_event_value(events) { |data| data["responseId"] }
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def extract_model_from_url(url)
|
|
118
|
+
uri = parsed_uri(url)
|
|
119
|
+
return nil unless uri
|
|
120
|
+
|
|
121
|
+
match = uri.path.match(%r{/models/([^/:]+)})
|
|
122
|
+
match && match[1]
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def pricing_mode(request:, usage:, response_headers:)
|
|
126
|
+
body_mode = Pricing::Mode.normalize(usage && usage["serviceTier"])
|
|
127
|
+
return body_mode if body_mode
|
|
128
|
+
|
|
129
|
+
header_mode = Pricing::Mode.normalize(response_header(response_headers, "x-gemini-service-tier"))
|
|
130
|
+
return header_mode if header_mode
|
|
131
|
+
|
|
132
|
+
request_mode = Pricing::Mode.normalize(request["service_tier"] || request["serviceTier"])
|
|
133
|
+
request_mode == "flex" ? request_mode : nil
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def response_header(headers, name)
|
|
137
|
+
headers.to_h.find { |key, _value| key.to_s.downcase == name }&.last
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def grounding_line_items_for_stream(events, model:)
|
|
141
|
+
quantity = find_event_value(events, reverse: true) do |data|
|
|
142
|
+
count = grounding_request_count(data["candidates"])
|
|
143
|
+
count if count.positive?
|
|
144
|
+
end
|
|
145
|
+
grounding_line_items(quantity || 0, model: model)
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
def grounding_request_count(candidates)
|
|
149
|
+
Array(candidates).sum do |candidate|
|
|
150
|
+
queries = candidate.dig("groundingMetadata", "webSearchQueries") || []
|
|
151
|
+
Array(queries).size
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def grounding_line_items(query_count, model:)
|
|
156
|
+
return [] unless query_count.positive?
|
|
157
|
+
|
|
158
|
+
billed_quantity = grounding_billed_quantity(query_count, model: model)
|
|
159
|
+
[
|
|
160
|
+
Charges::LineItem.build(
|
|
161
|
+
dimension_key: "grounding_request",
|
|
162
|
+
quantity: billed_quantity,
|
|
163
|
+
cost_status: Charges::CostStatus::UNKNOWN,
|
|
164
|
+
pricing_basis: "provider_usage",
|
|
165
|
+
provider_field: "response.candidates.groundingMetadata.webSearchQueries",
|
|
166
|
+
details: { web_search_queries: query_count }
|
|
167
|
+
)
|
|
168
|
+
]
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def grounding_billed_quantity(query_count, model:)
|
|
172
|
+
ModelFamilies.per_query_grounding?(model) ? query_count : 1
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
end
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LlmCostTracker
|
|
4
|
+
module Providers
|
|
5
|
+
module Gemini
|
|
6
|
+
module UsageExtractor
|
|
7
|
+
def self.token_usage(usage)
|
|
8
|
+
cache_read = usage["cachedContentTokenCount"].to_i
|
|
9
|
+
tool_use_prompt = usage["toolUsePromptTokenCount"].to_i
|
|
10
|
+
audio_input = audio_input_tokens(usage)
|
|
11
|
+
audio_output = audio_output_tokens(usage)
|
|
12
|
+
image_input = image_input_tokens(usage)
|
|
13
|
+
image_output = image_output_tokens(usage)
|
|
14
|
+
|
|
15
|
+
Usage::TokenUsage.build(
|
|
16
|
+
input_tokens: regular_input_tokens(usage: usage,
|
|
17
|
+
cache_read: cache_read,
|
|
18
|
+
audio_input: audio_input,
|
|
19
|
+
image_input: image_input) +
|
|
20
|
+
tool_use_prompt,
|
|
21
|
+
output_tokens: regular_output_tokens(usage: usage,
|
|
22
|
+
audio_output: audio_output,
|
|
23
|
+
image_output: image_output),
|
|
24
|
+
total_tokens: usage["totalTokenCount"],
|
|
25
|
+
cache_read_input_tokens: cache_read,
|
|
26
|
+
audio_input_tokens: audio_input,
|
|
27
|
+
audio_output_tokens: audio_output,
|
|
28
|
+
image_input_tokens: image_input,
|
|
29
|
+
image_output_tokens: image_output,
|
|
30
|
+
hidden_output_tokens: usage["thoughtsTokenCount"]
|
|
31
|
+
)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def self.gross_output_tokens(usage)
|
|
35
|
+
usage["candidatesTokenCount"].to_i + usage["thoughtsTokenCount"].to_i
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def self.regular_input_tokens(usage:, cache_read:, audio_input:, image_input:)
|
|
39
|
+
[usage["promptTokenCount"].to_i - cache_read - audio_input - image_input, 0].max
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def self.regular_output_tokens(usage:, audio_output:, image_output:)
|
|
43
|
+
[gross_output_tokens(usage) - audio_output - image_output, 0].max
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def self.audio_input_tokens(usage)
|
|
47
|
+
prompt_audio = modality_tokens(usage["promptTokensDetails"], "AUDIO")
|
|
48
|
+
cache_audio = modality_tokens(usage["cacheTokensDetails"], "AUDIO")
|
|
49
|
+
[prompt_audio - cache_audio, 0].max
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def self.audio_output_tokens(usage)
|
|
53
|
+
modality_tokens(usage["candidatesTokensDetails"], "AUDIO")
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def self.image_input_tokens(usage)
|
|
57
|
+
prompt_image = modality_tokens(usage["promptTokensDetails"], "IMAGE")
|
|
58
|
+
cache_image = modality_tokens(usage["cacheTokensDetails"], "IMAGE")
|
|
59
|
+
[prompt_image - cache_image, 0].max
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def self.image_output_tokens(usage)
|
|
63
|
+
modality_tokens(usage["candidatesTokensDetails"], "IMAGE")
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def self.modality_tokens(details, modality)
|
|
67
|
+
Array(details).sum do |detail|
|
|
68
|
+
next 0 unless detail["modality"] == modality
|
|
69
|
+
|
|
70
|
+
detail["tokenCount"].to_i
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
@@ -20,13 +20,7 @@ module LlmCostTracker
|
|
|
20
20
|
|
|
21
21
|
DATA_RESIDENCY_HOST_PATTERN = /\A[a-z]{2,3}\.api\.openai\.com\z/
|
|
22
22
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def api?(host)
|
|
26
|
-
API_HOSTS.include?(host.to_s.downcase)
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
def data_residency?(host)
|
|
23
|
+
def self.data_residency?(host)
|
|
30
24
|
host.to_s.downcase.match?(DATA_RESIDENCY_HOST_PATTERN)
|
|
31
25
|
end
|
|
32
26
|
end
|
|
@@ -19,26 +19,23 @@ module LlmCostTracker
|
|
|
19
19
|
NON_REASONING_GPT5_PATTERN = /\Agpt-5(?:\.\d+)?-chat\b/i
|
|
20
20
|
|
|
21
21
|
CHAT_COMPLETIONS_SEARCH_MODEL_PATTERN = /-search-(?:preview|api)\b/i
|
|
22
|
-
|
|
23
|
-
module_function
|
|
24
|
-
|
|
25
|
-
def data_residency?(model)
|
|
22
|
+
def self.data_residency?(model)
|
|
26
23
|
model.to_s.match?(DATA_RESIDENCY_MODEL_PATTERN)
|
|
27
24
|
end
|
|
28
25
|
|
|
29
|
-
def image_output?(model)
|
|
26
|
+
def self.image_output?(model)
|
|
30
27
|
model.to_s.match?(IMAGE_OUTPUT_MODEL_PATTERN)
|
|
31
28
|
end
|
|
32
29
|
|
|
33
|
-
def character_billed_tts?(model)
|
|
30
|
+
def self.character_billed_tts?(model)
|
|
34
31
|
model.to_s.match?(CHARACTER_BILLED_TTS_MODEL_PATTERN)
|
|
35
32
|
end
|
|
36
33
|
|
|
37
|
-
def chat_completions_search?(model)
|
|
34
|
+
def self.chat_completions_search?(model)
|
|
38
35
|
model.to_s.match?(CHAT_COMPLETIONS_SEARCH_MODEL_PATTERN)
|
|
39
36
|
end
|
|
40
37
|
|
|
41
|
-
def reasoning?(model)
|
|
38
|
+
def self.reasoning?(model)
|
|
42
39
|
name = model.to_s
|
|
43
40
|
return false if name.empty?
|
|
44
41
|
return false if NON_REASONING_GPT5_PATTERN.match?(name)
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LlmCostTracker
|
|
4
|
+
module Providers
|
|
5
|
+
module Openai
|
|
6
|
+
class Parser < LlmCostTracker::Parsers::Base
|
|
7
|
+
include ResponseParser
|
|
8
|
+
|
|
9
|
+
TRACKED_PATHS = %w[
|
|
10
|
+
/v1/chat/completions
|
|
11
|
+
/v1/completions
|
|
12
|
+
/v1/embeddings
|
|
13
|
+
/v1/responses
|
|
14
|
+
/v1/images/generations
|
|
15
|
+
/v1/images/edits
|
|
16
|
+
/v1/images/variations
|
|
17
|
+
/v1/audio/transcriptions
|
|
18
|
+
/v1/audio/translations
|
|
19
|
+
/v1/audio/speech
|
|
20
|
+
/v1/moderations
|
|
21
|
+
].freeze
|
|
22
|
+
|
|
23
|
+
class << self
|
|
24
|
+
def match?(url)
|
|
25
|
+
match_uri?(url, hosts: Hosts::API_HOSTS, exact_paths: TRACKED_PATHS)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def provider_names
|
|
29
|
+
%w[openai]
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def provider_for(_request_url)
|
|
34
|
+
"openai"
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "active_support/core_ext/hash/keys"
|
|
4
|
+
|
|
5
|
+
require_relative "hosts"
|
|
6
|
+
require_relative "model_families"
|
|
7
|
+
require_relative "service_charges"
|
|
8
|
+
require_relative "usage_extractor"
|
|
9
|
+
|
|
10
|
+
module LlmCostTracker
|
|
11
|
+
module Providers
|
|
12
|
+
module Openai
|
|
13
|
+
module ResponseParser
|
|
14
|
+
include LlmCostTracker::Providers::Openai::ServiceCharges
|
|
15
|
+
|
|
16
|
+
class << self
|
|
17
|
+
def combined_pricing_mode(host:, model:, service_tier:)
|
|
18
|
+
modes = [Pricing::Mode.normalize(service_tier)]
|
|
19
|
+
modes << "data_residency" if Hosts.data_residency?(host) && ModelFamilies.data_residency?(model)
|
|
20
|
+
Pricing::Mode.compose(modes)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def event_from_response(response:, request:, provider:, host:, usage_source:, pricing_mode: nil)
|
|
24
|
+
usage = response["usage"]&.deep_symbolize_keys
|
|
25
|
+
return nil unless usage
|
|
26
|
+
|
|
27
|
+
model = response["model"] || request["model"]
|
|
28
|
+
service_line_items =
|
|
29
|
+
ServiceCharges.service_line_items_for(response, request: request, model: response["model"]) +
|
|
30
|
+
ServiceCharges.transcription_line_items(usage)
|
|
31
|
+
Event.build(
|
|
32
|
+
provider: provider,
|
|
33
|
+
provider_response_id: response["id"],
|
|
34
|
+
pricing_mode: pricing_mode || combined_pricing_mode(
|
|
35
|
+
host: host, model: model, service_tier: response["service_tier"] || request["service_tier"]
|
|
36
|
+
),
|
|
37
|
+
model: model,
|
|
38
|
+
token_usage: UsageExtractor.token_usage(usage, model: model),
|
|
39
|
+
usage_source: usage_source,
|
|
40
|
+
service_line_items: service_line_items
|
|
41
|
+
)
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def parse(request_url:, request_body:, response_status:, response_body:, **)
|
|
46
|
+
return nil unless response_status == 200
|
|
47
|
+
|
|
48
|
+
ResponseParser.event_from_response(
|
|
49
|
+
response: safe_json_parse(response_body),
|
|
50
|
+
request: safe_json_parse(request_body),
|
|
51
|
+
provider: provider_for(request_url),
|
|
52
|
+
host: parsed_uri(request_url)&.host,
|
|
53
|
+
usage_source: Usage::Source::RESPONSE
|
|
54
|
+
)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def parse_stream(response_status:, request_url: nil, request_body: nil, events: [], **)
|
|
58
|
+
return nil unless response_status == 200
|
|
59
|
+
|
|
60
|
+
request = safe_json_parse(request_body)
|
|
61
|
+
usage = detect_stream_usage(events)
|
|
62
|
+
context = stream_capture_context(events: events, request: request, request_url: request_url)
|
|
63
|
+
|
|
64
|
+
return build_known_stream_usage(usage: usage, **context) if usage
|
|
65
|
+
|
|
66
|
+
warn_missing_stream_usage(request_url: request_url, request: request)
|
|
67
|
+
build_unknown_stream_usage(**context)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def auto_enable_stream_usage?(request_url)
|
|
71
|
+
openai_chat_completions_url?(request_url)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
private
|
|
75
|
+
|
|
76
|
+
def stream_capture_context(events:, request:, request_url:)
|
|
77
|
+
model = find_event_value(events) do |data|
|
|
78
|
+
data["model"] || data.dig("response", "model") || data.dig("chunk", "model")
|
|
79
|
+
end || request["model"]
|
|
80
|
+
{
|
|
81
|
+
provider: provider_for(request_url),
|
|
82
|
+
model: model,
|
|
83
|
+
provider_response_id: find_event_value(events) do |data|
|
|
84
|
+
data["id"] || data.dig("response", "id") || data.dig("chunk", "id")
|
|
85
|
+
end,
|
|
86
|
+
pricing_mode: pricing_mode(
|
|
87
|
+
request_url: request_url,
|
|
88
|
+
model: model,
|
|
89
|
+
service_tier: stream_pricing_mode(events) || request["service_tier"]
|
|
90
|
+
),
|
|
91
|
+
service_line_items: openai_stream_service_line_items(events, request: request, model: model)
|
|
92
|
+
}
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def build_known_stream_usage(usage:,
|
|
96
|
+
provider:,
|
|
97
|
+
model:,
|
|
98
|
+
provider_response_id:,
|
|
99
|
+
pricing_mode:,
|
|
100
|
+
service_line_items:)
|
|
101
|
+
Event.build(
|
|
102
|
+
provider: provider,
|
|
103
|
+
provider_response_id: provider_response_id,
|
|
104
|
+
pricing_mode: pricing_mode,
|
|
105
|
+
model: model,
|
|
106
|
+
token_usage: UsageExtractor.token_usage(usage, model: model),
|
|
107
|
+
stream: true,
|
|
108
|
+
usage_source: Usage::Source::STREAM_FINAL,
|
|
109
|
+
service_line_items: service_line_items
|
|
110
|
+
)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def warn_missing_stream_usage(request_url:, request:)
|
|
114
|
+
return unless request["stream"]
|
|
115
|
+
return unless openai_chat_completions_url?(request_url)
|
|
116
|
+
return if request.dig("stream_options", "include_usage")
|
|
117
|
+
|
|
118
|
+
Logging.warn(
|
|
119
|
+
"OpenAI-compatible chat-completions stream finished without a final usage chunk. " \
|
|
120
|
+
"Set `stream_options: { include_usage: true }` in your request body so the gem can " \
|
|
121
|
+
"record token counts. This call was stored with usage_source=#{Usage::Source::UNKNOWN}."
|
|
122
|
+
)
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def openai_chat_completions_url?(request_url)
|
|
126
|
+
uri = parsed_uri(request_url)
|
|
127
|
+
uri && uri.path.to_s.end_with?("/chat/completions")
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def detect_stream_usage(events)
|
|
131
|
+
usage = find_event_value(events, reverse: true) do |data|
|
|
132
|
+
candidate = data["usage"] || data.dig("response", "usage") || data.dig("chunk", "usage")
|
|
133
|
+
candidate if candidate.is_a?(Hash)
|
|
134
|
+
end
|
|
135
|
+
usage&.deep_symbolize_keys
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def stream_pricing_mode(events)
|
|
139
|
+
find_event_value(events, reverse: true) do |data|
|
|
140
|
+
data["service_tier"] || data.dig("response", "service_tier") || data.dig("chunk", "service_tier")
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def pricing_mode(request_url:, model:, service_tier:)
|
|
145
|
+
ResponseParser.combined_pricing_mode(host: parsed_uri(request_url)&.host,
|
|
146
|
+
model: model,
|
|
147
|
+
service_tier: service_tier)
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
end
|