llm_cost_tracker 0.7.3 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ruby-version +1 -0
- data/CHANGELOG.md +173 -0
- data/README.md +60 -220
- data/app/assets/llm_cost_tracker/application.css +282 -45
- data/app/controllers/llm_cost_tracker/application_controller.rb +25 -20
- data/app/controllers/llm_cost_tracker/assets_controller.rb +11 -1
- data/app/controllers/llm_cost_tracker/calls_controller.rb +22 -19
- data/app/controllers/llm_cost_tracker/data_quality_controller.rb +14 -2
- data/app/controllers/llm_cost_tracker/reconciliation_controller.rb +106 -0
- data/app/controllers/llm_cost_tracker/tags_controller.rb +15 -1
- data/app/helpers/llm_cost_tracker/application_helper.rb +18 -21
- data/app/helpers/llm_cost_tracker/dashboard_filter_helper.rb +3 -21
- data/app/helpers/llm_cost_tracker/dashboard_filter_options_helper.rb +4 -4
- data/app/helpers/llm_cost_tracker/dashboard_query_helper.rb +1 -1
- data/app/helpers/llm_cost_tracker/inline_style_helper.rb +28 -0
- data/app/helpers/llm_cost_tracker/reconciliation_helper.rb +13 -0
- data/app/helpers/llm_cost_tracker/token_usage_helper.rb +24 -7
- data/app/models/llm_cost_tracker/call.rb +166 -0
- data/app/models/llm_cost_tracker/call_line_item.rb +18 -0
- data/app/models/llm_cost_tracker/call_rollup.rb +6 -0
- data/app/models/llm_cost_tracker/call_tag.rb +12 -0
- data/app/models/llm_cost_tracker/ingestion/inbox_entry.rb +9 -0
- data/app/models/llm_cost_tracker/ingestion/lease.rb +0 -3
- data/app/models/llm_cost_tracker/provider_invoice.rb +13 -0
- data/app/models/llm_cost_tracker/provider_invoice_import.rb +24 -0
- data/app/services/llm_cost_tracker/dashboard/data_quality.rb +152 -32
- data/app/services/llm_cost_tracker/dashboard/date_range.rb +1 -1
- data/app/services/llm_cost_tracker/dashboard/filter.rb +8 -6
- data/app/services/llm_cost_tracker/dashboard/overview_stats.rb +74 -21
- data/app/services/llm_cost_tracker/dashboard/pagination.rb +6 -4
- data/app/services/llm_cost_tracker/dashboard/params.rb +8 -2
- data/app/services/llm_cost_tracker/dashboard/provider_breakdown.rb +1 -1
- data/app/services/llm_cost_tracker/dashboard/spend_anomaly.rb +4 -3
- data/app/services/llm_cost_tracker/dashboard/tag_breakdown.rb +42 -9
- data/app/services/llm_cost_tracker/dashboard/tag_key_explorer.rb +14 -37
- data/app/services/llm_cost_tracker/dashboard/time_series.rb +1 -1
- data/app/services/llm_cost_tracker/dashboard/top_models.rb +1 -1
- data/app/views/layouts/llm_cost_tracker/application.html.erb +6 -1
- data/app/views/llm_cost_tracker/calls/index.html.erb +33 -75
- data/app/views/llm_cost_tracker/calls/show.html.erb +73 -33
- data/app/views/llm_cost_tracker/dashboard/index.html.erb +16 -57
- data/app/views/llm_cost_tracker/data_quality/index.html.erb +183 -167
- data/app/views/llm_cost_tracker/errors/database.html.erb +1 -1
- data/app/views/llm_cost_tracker/models/index.html.erb +18 -50
- data/app/views/llm_cost_tracker/reconciliation/index.html.erb +183 -0
- data/app/views/llm_cost_tracker/shared/_bar.html.erb +1 -1
- data/app/views/llm_cost_tracker/shared/_filters.html.erb +66 -0
- data/app/views/llm_cost_tracker/shared/_metric_stack.html.erb +1 -1
- data/app/views/llm_cost_tracker/shared/_sort.html.erb +13 -0
- data/app/views/llm_cost_tracker/shared/setup_required.html.erb +1 -1
- data/app/views/llm_cost_tracker/tags/index.html.erb +3 -34
- data/app/views/llm_cost_tracker/tags/show.html.erb +64 -36
- data/config/routes.rb +3 -2
- data/lib/llm_cost_tracker/billing/components.rb +95 -0
- data/lib/llm_cost_tracker/billing/components.yml +188 -0
- data/lib/llm_cost_tracker/billing/cost_status.rb +45 -0
- data/lib/llm_cost_tracker/billing/line_item.rb +189 -0
- data/lib/llm_cost_tracker/budget.rb +26 -36
- data/lib/llm_cost_tracker/capture/stream_collector.rb +125 -38
- data/lib/llm_cost_tracker/capture/stream_tracker.rb +40 -5
- data/lib/llm_cost_tracker/configuration.rb +86 -17
- data/lib/llm_cost_tracker/dashboard_setup_state.rb +109 -0
- data/lib/llm_cost_tracker/doctor/cost_drift_check.rb +56 -0
- data/lib/llm_cost_tracker/doctor/ingestion_check.rb +48 -30
- data/lib/llm_cost_tracker/doctor/invoice_reconciliation_check.rb +164 -0
- data/lib/llm_cost_tracker/doctor/legacy_audit_check.rb +36 -0
- data/lib/llm_cost_tracker/doctor/legacy_billing_status_check.rb +22 -0
- data/lib/llm_cost_tracker/doctor/price_check.rb +2 -2
- data/lib/llm_cost_tracker/doctor/pricing_snapshot_drift_check.rb +85 -0
- data/lib/llm_cost_tracker/doctor/probe.rb +17 -0
- data/lib/llm_cost_tracker/doctor/schema_check.rb +34 -0
- data/lib/llm_cost_tracker/doctor.rb +111 -44
- data/lib/llm_cost_tracker/engine.rb +9 -0
- data/lib/llm_cost_tracker/errors.rb +5 -19
- data/lib/llm_cost_tracker/event.rb +11 -3
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/call_rollups_generator.rb +43 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/durable_ingestion_generator.rb +43 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/install_generator.rb +17 -5
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/prices_generator.rb +2 -6
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/reconciliation_generator.rb +34 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_call_rollups.rb.erb +15 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_calls.rb.erb +104 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_durable_ingestion.rb.erb +29 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_reconciliation.rb.erb +55 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +28 -25
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_call_rollups_provider.rb.erb +20 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_call_tags_key_value_index.rb.erb +32 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_image_tokens.rb.erb +18 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_call_rollups_provider_generator.rb +38 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/{add_provider_response_id_generator.rb → upgrade_call_tags_key_value_index_generator.rb} +5 -4
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/{add_streaming_generator.rb → upgrade_image_tokens_generator.rb} +4 -4
- data/lib/llm_cost_tracker/ingestion/batch.rb +11 -12
- data/lib/llm_cost_tracker/ingestion/inbox.rb +39 -24
- data/lib/llm_cost_tracker/ingestion/inline.rb +22 -0
- data/lib/llm_cost_tracker/ingestion/worker.rb +24 -7
- data/lib/llm_cost_tracker/ingestion.rb +66 -22
- data/lib/llm_cost_tracker/integrations/anthropic.rb +68 -42
- data/lib/llm_cost_tracker/integrations/base.rb +56 -32
- data/lib/llm_cost_tracker/integrations/openai.rb +342 -63
- data/lib/llm_cost_tracker/integrations/ruby_llm.rb +110 -11
- data/lib/llm_cost_tracker/integrations.rb +21 -3
- data/lib/llm_cost_tracker/ledger/period/totals.rb +30 -11
- data/lib/llm_cost_tracker/ledger/period.rb +5 -5
- data/lib/llm_cost_tracker/ledger/rollups/upsert_sql.rb +2 -2
- data/lib/llm_cost_tracker/ledger/rollups.rb +90 -25
- data/lib/llm_cost_tracker/ledger/schema/adapter.rb +18 -0
- data/lib/llm_cost_tracker/ledger/schema/call_line_items.rb +79 -0
- data/lib/llm_cost_tracker/ledger/schema/call_rollups.rb +37 -0
- data/lib/llm_cost_tracker/ledger/schema/call_tags.rb +41 -0
- data/lib/llm_cost_tracker/ledger/schema/calls.rb +36 -23
- data/lib/llm_cost_tracker/ledger/schema/ingestion_inbox_entries.rb +47 -0
- data/lib/llm_cost_tracker/ledger/schema/ingestion_leases.rb +42 -0
- data/lib/llm_cost_tracker/ledger/schema/provider_invoice_imports.rb +46 -0
- data/lib/llm_cost_tracker/ledger/schema/provider_invoices.rb +57 -0
- data/lib/llm_cost_tracker/ledger/store.rb +103 -20
- data/lib/llm_cost_tracker/ledger/tags/encoding.rb +37 -0
- data/lib/llm_cost_tracker/ledger/tags/query.rb +6 -11
- data/lib/llm_cost_tracker/ledger/tags/sql.rb +27 -15
- data/lib/llm_cost_tracker/ledger.rb +5 -2
- data/lib/llm_cost_tracker/logging.rb +2 -5
- data/lib/llm_cost_tracker/masking.rb +39 -0
- data/lib/llm_cost_tracker/middleware/faraday.rb +95 -35
- data/lib/llm_cost_tracker/parsers/anthropic.rb +74 -14
- data/lib/llm_cost_tracker/parsers/base.rb +13 -4
- data/lib/llm_cost_tracker/parsers/gemini.rb +105 -15
- data/lib/llm_cost_tracker/parsers/openai.rb +16 -2
- data/lib/llm_cost_tracker/parsers/openai_compatible.rb +15 -3
- data/lib/llm_cost_tracker/parsers/openai_service_charges.rb +126 -0
- data/lib/llm_cost_tracker/parsers/openai_usage.rb +157 -59
- data/lib/llm_cost_tracker/parsers/sse.rb +1 -1
- data/lib/llm_cost_tracker/parsers.rb +1 -1
- data/lib/llm_cost_tracker/prices.json +198 -22
- data/lib/llm_cost_tracker/pricing/effective_prices.rb +28 -21
- data/lib/llm_cost_tracker/pricing/explainer.rb +4 -5
- data/lib/llm_cost_tracker/pricing/lookup.rb +73 -36
- data/lib/llm_cost_tracker/pricing/mode.rb +76 -0
- data/lib/llm_cost_tracker/pricing/registry.rb +67 -45
- data/lib/llm_cost_tracker/pricing/service_charges.rb +210 -0
- data/lib/llm_cost_tracker/pricing/sync/fetcher.rb +26 -17
- data/lib/llm_cost_tracker/pricing/sync/registry_diff.rb +6 -15
- data/lib/llm_cost_tracker/pricing/sync/registry_writer.rb +50 -1
- data/lib/llm_cost_tracker/pricing/sync.rb +59 -10
- data/lib/llm_cost_tracker/pricing/sync_change_printer.rb +32 -0
- data/lib/llm_cost_tracker/pricing.rb +220 -28
- data/lib/llm_cost_tracker/railtie.rb +6 -8
- data/lib/llm_cost_tracker/reconcile_tasks.rb +134 -0
- data/lib/llm_cost_tracker/reconciliation/diff.rb +428 -0
- data/lib/llm_cost_tracker/reconciliation/diff_result.rb +48 -0
- data/lib/llm_cost_tracker/reconciliation/import_result.rb +19 -0
- data/lib/llm_cost_tracker/reconciliation/importer.rb +253 -0
- data/lib/llm_cost_tracker/reconciliation/sources/anthropic_usage.rb +171 -0
- data/lib/llm_cost_tracker/reconciliation/sources/fingerprint.rb +20 -0
- data/lib/llm_cost_tracker/reconciliation/sources/openai_usage.rb +142 -0
- data/lib/llm_cost_tracker/reconciliation.rb +118 -0
- data/lib/llm_cost_tracker/report/data.rb +19 -8
- data/lib/llm_cost_tracker/report.rb +0 -4
- data/lib/llm_cost_tracker/retention.rb +22 -9
- data/lib/llm_cost_tracker/tags/context.rb +2 -5
- data/lib/llm_cost_tracker/tags/key.rb +4 -0
- data/lib/llm_cost_tracker/tags/sanitizer.rb +71 -20
- data/lib/llm_cost_tracker/timing.rb +15 -0
- data/lib/llm_cost_tracker/token_usage.rb +64 -42
- data/lib/llm_cost_tracker/tracker.rb +97 -27
- data/lib/llm_cost_tracker/usage_capture.rb +29 -8
- data/lib/llm_cost_tracker/version.rb +1 -1
- data/lib/llm_cost_tracker.rb +45 -35
- data/lib/tasks/llm_cost_tracker.rake +45 -17
- metadata +71 -41
- data/app/models/llm_cost_tracker/ingestion/event.rb +0 -13
- data/app/models/llm_cost_tracker/ledger/call.rb +0 -45
- data/app/models/llm_cost_tracker/ledger/call_metrics.rb +0 -66
- data/app/models/llm_cost_tracker/ledger/period/grouping.rb +0 -71
- data/app/models/llm_cost_tracker/ledger/period/total.rb +0 -13
- data/app/models/llm_cost_tracker/ledger/tags/accessors.rb +0 -19
- data/lib/llm_cost_tracker/configuration/instrumentation.rb +0 -33
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_ingestion_generator.rb +0 -29
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_latency_ms_generator.rb +0 -29
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_period_totals_generator.rb +0 -29
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_token_usage_generator.rb +0 -42
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_ingestion_to_llm_cost_tracker.rb.erb +0 -33
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_latency_ms_to_llm_api_calls.rb.erb +0 -9
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_period_totals_to_llm_cost_tracker.rb.erb +0 -104
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_provider_response_id_to_llm_api_calls.rb.erb +0 -15
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_streaming_to_llm_api_calls.rb.erb +0 -21
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_token_usage_to_llm_api_calls.rb.erb +0 -22
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_api_calls.rb.erb +0 -83
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_cost_precision.rb.erb +0 -26
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_tags_to_jsonb.rb.erb +0 -44
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_cost_precision_generator.rb +0 -29
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_tags_to_jsonb_generator.rb +0 -29
- data/lib/llm_cost_tracker/ledger/rollups/batch.rb +0 -43
- data/lib/llm_cost_tracker/ledger/schema/period_totals.rb +0 -32
- data/lib/llm_cost_tracker/pricing/components.rb +0 -37
- data/lib/llm_cost_tracker/pricing/sync/registry_loader.rb +0 -63
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LlmCostTracker
|
|
4
|
+
module Masking
|
|
5
|
+
SENSITIVE_KEYS = %i[
|
|
6
|
+
provider_api_key_id provider_workspace_id provider_organization_id provider_project_id
|
|
7
|
+
].to_set.freeze
|
|
8
|
+
MASK_TAIL_LENGTH = 4
|
|
9
|
+
|
|
10
|
+
module_function
|
|
11
|
+
|
|
12
|
+
def mask_value(key, value)
|
|
13
|
+
string = value.to_s
|
|
14
|
+
return string unless SENSITIVE_KEYS.include?(key.to_sym)
|
|
15
|
+
return string if string.length <= MASK_TAIL_LENGTH
|
|
16
|
+
|
|
17
|
+
"***#{string[-MASK_TAIL_LENGTH, MASK_TAIL_LENGTH]}"
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def format_attribution(attribution, separator: ", ")
|
|
21
|
+
return "" if attribution.nil? || attribution.empty?
|
|
22
|
+
|
|
23
|
+
attribution.map { |key, value| "#{key}=#{mask_value(key, value)}" }.join(separator)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def mask_hash(hash)
|
|
27
|
+
return hash unless hash.is_a?(Hash)
|
|
28
|
+
|
|
29
|
+
hash.each_with_object({}) do |(key, value), masked|
|
|
30
|
+
masked[key] = case value
|
|
31
|
+
when Hash then mask_hash(value)
|
|
32
|
+
when Array then value.map { |entry| entry.is_a?(Hash) ? mask_hash(entry) : entry }
|
|
33
|
+
else
|
|
34
|
+
mask_value(key, value)
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -2,10 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
require "faraday"
|
|
4
4
|
require "json"
|
|
5
|
+
require "stringio"
|
|
5
6
|
require "uri"
|
|
6
7
|
|
|
7
8
|
require_relative "../logging"
|
|
8
9
|
require_relative "../capture/stream"
|
|
10
|
+
require_relative "../timing"
|
|
9
11
|
|
|
10
12
|
module LlmCostTracker
|
|
11
13
|
module Middleware
|
|
@@ -19,31 +21,85 @@ module LlmCostTracker
|
|
|
19
21
|
return @app.call(request_env) unless LlmCostTracker.configuration.enabled
|
|
20
22
|
|
|
21
23
|
request_url = request_env.url.to_s
|
|
22
|
-
request_body = read_body(request_env.body)
|
|
24
|
+
request_body = read_body(request_env.body)
|
|
23
25
|
parser = Parsers.find_for(request_url)
|
|
24
26
|
streaming = parser&.streaming_request?(request_url, request_body)
|
|
27
|
+
request_body = inject_stream_usage_flag(request_env, parser, request_url) if streaming
|
|
25
28
|
stream_buffer = install_stream_tap(request_env) if streaming
|
|
26
29
|
|
|
27
30
|
Tracker.enforce_budget! if parser
|
|
28
31
|
context_tags, metadata = tag_snapshot(request_env) if parser
|
|
29
|
-
started_at =
|
|
32
|
+
started_at = LlmCostTracker::Timing.now_monotonic
|
|
30
33
|
|
|
34
|
+
invoke_app_with_capture(
|
|
35
|
+
request_env: request_env, parser: parser, request_url: request_url,
|
|
36
|
+
request_body: request_body, streaming: streaming, stream_buffer: stream_buffer,
|
|
37
|
+
context_tags: context_tags, metadata: metadata, started_at: started_at
|
|
38
|
+
)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
def invoke_app_with_capture(request_env:, parser:, request_url:, request_body:, streaming:,
|
|
44
|
+
stream_buffer:, context_tags:, metadata:, started_at:)
|
|
45
|
+
response_received = false
|
|
31
46
|
@app.call(request_env).on_complete do |response_env|
|
|
47
|
+
response_received = true
|
|
32
48
|
process(
|
|
33
|
-
parser: parser,
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
49
|
+
parser: parser, request_url: request_url, request_body: request_body,
|
|
50
|
+
response_env: response_env, latency_ms: LlmCostTracker::Timing.elapsed_ms(started_at),
|
|
51
|
+
streaming: streaming, stream_buffer: stream_buffer,
|
|
52
|
+
context_tags: context_tags, metadata: metadata
|
|
53
|
+
)
|
|
54
|
+
end
|
|
55
|
+
rescue StandardError => e
|
|
56
|
+
if streaming && parser && !response_received
|
|
57
|
+
process_interrupted_stream(
|
|
58
|
+
parser: parser, request_url: request_url, request_body: request_body,
|
|
59
|
+
latency_ms: LlmCostTracker::Timing.elapsed_ms(started_at),
|
|
60
|
+
context_tags: context_tags, metadata: metadata, error: e
|
|
42
61
|
)
|
|
43
62
|
end
|
|
63
|
+
raise
|
|
44
64
|
end
|
|
45
65
|
|
|
46
|
-
|
|
66
|
+
def inject_stream_usage_flag(request_env, parser, request_url)
|
|
67
|
+
body_string = read_body(request_env.body)
|
|
68
|
+
return body_string unless LlmCostTracker.configuration.auto_enable_stream_usage
|
|
69
|
+
return body_string unless parser&.auto_enable_stream_usage?(request_url)
|
|
70
|
+
|
|
71
|
+
body = JSON.parse(body_string)
|
|
72
|
+
return body_string if body["stream_options"].is_a?(Hash) && body["stream_options"].key?("include_usage")
|
|
73
|
+
|
|
74
|
+
body["stream_options"] = (body["stream_options"] || {}).merge("include_usage" => true)
|
|
75
|
+
new_body = body.to_json
|
|
76
|
+
request_env.body = new_body
|
|
77
|
+
new_body
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def process_interrupted_stream(parser:, request_url:, request_body:, latency_ms:,
|
|
81
|
+
context_tags:, metadata:, error:)
|
|
82
|
+
request = parser.safe_json_parse(request_body)
|
|
83
|
+
capture = UsageCapture.build(
|
|
84
|
+
provider: parser.provider_for(request_url),
|
|
85
|
+
model: request["model"] || UsageCapture::UNKNOWN_MODEL,
|
|
86
|
+
token_usage: TokenUsage.build(input_tokens: 0, output_tokens: 0, total_tokens: 0),
|
|
87
|
+
stream: true,
|
|
88
|
+
usage_source: :unknown
|
|
89
|
+
)
|
|
90
|
+
merged_metadata = (metadata || {}).merge(
|
|
91
|
+
stream_interrupted: true,
|
|
92
|
+
stream_interrupted_error: "#{error.class}: #{error.message}"
|
|
93
|
+
)
|
|
94
|
+
Tracker.record(
|
|
95
|
+
capture: capture,
|
|
96
|
+
latency_ms: latency_ms,
|
|
97
|
+
metadata: merged_metadata,
|
|
98
|
+
context_tags: context_tags
|
|
99
|
+
)
|
|
100
|
+
rescue StandardError => e
|
|
101
|
+
Logging.warn("Error recording interrupted stream: #{e.class}: #{e.message}")
|
|
102
|
+
end
|
|
47
103
|
|
|
48
104
|
def process(parser:, request_url:, request_body:, response_env:,
|
|
49
105
|
latency_ms:, streaming:, stream_buffer:, context_tags:, metadata:)
|
|
@@ -101,21 +157,14 @@ module LlmCostTracker
|
|
|
101
157
|
end
|
|
102
158
|
|
|
103
159
|
def parse_stream(parser:, request_url:, request_body:, response_env:, stream_buffer:)
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
return parser.parse_stream(
|
|
107
|
-
request_url: request_url,
|
|
108
|
-
request_body: request_body,
|
|
109
|
-
response_status: response_env.status,
|
|
110
|
-
response_headers: response_env.response_headers
|
|
111
|
-
)
|
|
112
|
-
end
|
|
160
|
+
overflowed = stream_buffer&.dig(:overflowed) == true
|
|
161
|
+
Logging.warn(capture_warning(request_url, stream_buffer)) if overflowed
|
|
113
162
|
|
|
114
163
|
body = stream_buffer&.dig(:buffer)&.string
|
|
115
164
|
body = read_body(response_env.body) if body.blank?
|
|
116
165
|
|
|
117
166
|
if body.blank?
|
|
118
|
-
Logging.warn(capture_warning(request_url, stream_buffer))
|
|
167
|
+
Logging.warn(capture_warning(request_url, stream_buffer)) unless overflowed
|
|
119
168
|
return parser.parse_stream(
|
|
120
169
|
request_url: request_url,
|
|
121
170
|
request_body: request_body,
|
|
@@ -124,7 +173,7 @@ module LlmCostTracker
|
|
|
124
173
|
)
|
|
125
174
|
end
|
|
126
175
|
|
|
127
|
-
events = Parsers::SSE.parse(body)
|
|
176
|
+
events = overflowed ? [] : Parsers::SSE.parse(body)
|
|
128
177
|
parser.parse_stream(
|
|
129
178
|
request_url: request_url,
|
|
130
179
|
request_body: request_body,
|
|
@@ -134,8 +183,19 @@ module LlmCostTracker
|
|
|
134
183
|
)
|
|
135
184
|
end
|
|
136
185
|
|
|
186
|
+
def forward_on_data_chunk(callable, chunk, size, env)
|
|
187
|
+
arity = callable.arity
|
|
188
|
+
return callable.call(chunk, size, env) if arity.negative?
|
|
189
|
+
|
|
190
|
+
case arity
|
|
191
|
+
when 0, 1 then callable.call(chunk)
|
|
192
|
+
when 2 then callable.call(chunk, size)
|
|
193
|
+
else callable.call(chunk, size, env)
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
|
|
137
197
|
def install_stream_tap(request_env)
|
|
138
|
-
request = request_env.
|
|
198
|
+
request = request_env.request
|
|
139
199
|
return nil unless request
|
|
140
200
|
|
|
141
201
|
original = request.on_data
|
|
@@ -144,16 +204,16 @@ module LlmCostTracker
|
|
|
144
204
|
state = { buffer: StringIO.new, bytes: 0, overflowed: false }
|
|
145
205
|
request.on_data = proc do |chunk, size, env|
|
|
146
206
|
chunk = chunk.to_s
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
207
|
+
remaining = Capture::Stream::LIMIT_BYTES - state[:bytes]
|
|
208
|
+
if chunk.bytesize <= remaining
|
|
209
|
+
state[:buffer] << chunk
|
|
210
|
+
state[:bytes] += chunk.bytesize
|
|
211
|
+
else
|
|
212
|
+
state[:buffer] << chunk.byteslice(0, remaining) if remaining.positive?
|
|
213
|
+
state[:bytes] += [remaining, 0].max
|
|
214
|
+
state[:overflowed] = true
|
|
155
215
|
end
|
|
156
|
-
original
|
|
216
|
+
forward_on_data_chunk(original, chunk, size, env)
|
|
157
217
|
end
|
|
158
218
|
state
|
|
159
219
|
rescue StandardError => e
|
|
@@ -204,8 +264,8 @@ module LlmCostTracker
|
|
|
204
264
|
uri = URI.parse(value.to_s)
|
|
205
265
|
uri.query = nil
|
|
206
266
|
uri.fragment = nil
|
|
207
|
-
uri.
|
|
208
|
-
uri.
|
|
267
|
+
uri.user = nil
|
|
268
|
+
uri.password = nil
|
|
209
269
|
uri.to_s
|
|
210
270
|
rescue URI::InvalidURIError
|
|
211
271
|
value.to_s.split("?", 2).first
|
|
@@ -31,7 +31,8 @@ module LlmCostTracker
|
|
|
31
31
|
pricing_mode: pricing_mode(request: request, response: response, usage: usage),
|
|
32
32
|
model: response["model"] || request["model"],
|
|
33
33
|
token_usage: token_usage(usage: usage, cache_read: cache_read),
|
|
34
|
-
usage_source: :response
|
|
34
|
+
usage_source: :response,
|
|
35
|
+
service_line_items: service_line_items(usage)
|
|
35
36
|
)
|
|
36
37
|
end
|
|
37
38
|
|
|
@@ -60,20 +61,28 @@ module LlmCostTracker
|
|
|
60
61
|
end
|
|
61
62
|
end
|
|
62
63
|
|
|
64
|
+
def provider_for(_request_url)
|
|
65
|
+
"anthropic"
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
DATA_RESIDENCY_GEOS = %w[us].freeze
|
|
69
|
+
STANDARD_EQUIVALENT_SERVICE_TIERS = %w[standard standard_only priority].freeze
|
|
70
|
+
private_constant :DATA_RESIDENCY_GEOS, :STANDARD_EQUIVALENT_SERVICE_TIERS
|
|
71
|
+
|
|
63
72
|
private
|
|
64
73
|
|
|
65
74
|
def stream_usage(events)
|
|
66
|
-
start_usage = find_event_value(events, reverse: true) do |data|
|
|
67
|
-
data.dig("message", "usage") if data["type"] == "message_start"
|
|
68
|
-
end
|
|
69
75
|
latest_delta = find_event_value(events, reverse: true) do |data|
|
|
70
76
|
data["usage"] if data["type"] == "message_delta" && data["usage"].is_a?(Hash)
|
|
71
77
|
end
|
|
78
|
+
return nil unless latest_delta
|
|
72
79
|
|
|
73
|
-
|
|
80
|
+
start_usage = find_event_value(events, reverse: true) do |data|
|
|
81
|
+
data.dig("message", "usage") if data["type"] == "message_start"
|
|
82
|
+
end
|
|
74
83
|
|
|
75
|
-
(start_usage || {}).merge(latest_delta
|
|
76
|
-
delta_val
|
|
84
|
+
(start_usage || {}).merge(latest_delta) do |_key, start_val, delta_val|
|
|
85
|
+
delta_val || start_val
|
|
77
86
|
end
|
|
78
87
|
end
|
|
79
88
|
|
|
@@ -87,7 +96,44 @@ module LlmCostTracker
|
|
|
87
96
|
model: model,
|
|
88
97
|
token_usage: token_usage(usage: usage, cache_read: cache_read),
|
|
89
98
|
stream: true,
|
|
90
|
-
usage_source: :stream_final
|
|
99
|
+
usage_source: :stream_final,
|
|
100
|
+
service_line_items: service_line_items(usage)
|
|
101
|
+
)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def service_line_items(usage)
|
|
105
|
+
server_tool_use = usage["server_tool_use"]
|
|
106
|
+
return [] unless server_tool_use.is_a?(Hash)
|
|
107
|
+
|
|
108
|
+
[
|
|
109
|
+
service_line_item(
|
|
110
|
+
component_key: :web_search_request,
|
|
111
|
+
quantity: server_tool_use["web_search_requests"],
|
|
112
|
+
provider_field: "usage.server_tool_use.web_search_requests"
|
|
113
|
+
),
|
|
114
|
+
service_line_item(
|
|
115
|
+
component_key: :web_fetch_request,
|
|
116
|
+
quantity: server_tool_use["web_fetch_requests"],
|
|
117
|
+
provider_field: "usage.server_tool_use.web_fetch_requests"
|
|
118
|
+
),
|
|
119
|
+
service_line_item(
|
|
120
|
+
component_key: :code_execution_request,
|
|
121
|
+
quantity: server_tool_use["code_execution_requests"],
|
|
122
|
+
provider_field: "usage.server_tool_use.code_execution_requests"
|
|
123
|
+
)
|
|
124
|
+
].compact
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def service_line_item(component_key:, quantity:, provider_field:)
|
|
128
|
+
quantity = quantity.to_i
|
|
129
|
+
return if quantity.zero?
|
|
130
|
+
|
|
131
|
+
Billing::LineItem.build(
|
|
132
|
+
component_key: component_key,
|
|
133
|
+
quantity: quantity,
|
|
134
|
+
cost_status: Billing::CostStatus::UNKNOWN,
|
|
135
|
+
pricing_basis: :provider_usage,
|
|
136
|
+
provider_field: provider_field
|
|
91
137
|
)
|
|
92
138
|
end
|
|
93
139
|
|
|
@@ -97,32 +143,46 @@ module LlmCostTracker
|
|
|
97
143
|
cache_creation = usage["cache_creation"]
|
|
98
144
|
if cache_creation.is_a?(Hash)
|
|
99
145
|
cache_write = cache_creation["ephemeral_5m_input_tokens"].to_i
|
|
100
|
-
|
|
146
|
+
cache_write_extended = cache_creation["ephemeral_1h_input_tokens"].to_i
|
|
101
147
|
else
|
|
148
|
+
warn_unexpected_cache_creation(cache_creation, usage)
|
|
102
149
|
cache_write = usage["cache_creation_input_tokens"].to_i
|
|
103
|
-
|
|
150
|
+
cache_write_extended = 0
|
|
104
151
|
end
|
|
152
|
+
hidden_output = (
|
|
153
|
+
usage["thinking_tokens"] || usage["thinking_output_tokens"] ||
|
|
154
|
+
usage.dig("output_tokens_details", "reasoning_tokens")
|
|
155
|
+
).to_i
|
|
105
156
|
|
|
106
157
|
TokenUsage.build(
|
|
107
158
|
input_tokens: input,
|
|
108
159
|
output_tokens: output,
|
|
109
|
-
total_tokens: input + output + cache_read + cache_write +
|
|
110
|
-
cache_read_input_tokens:
|
|
160
|
+
total_tokens: input + output + cache_read + cache_write + cache_write_extended,
|
|
161
|
+
cache_read_input_tokens: cache_read,
|
|
111
162
|
cache_write_input_tokens: cache_write,
|
|
112
|
-
|
|
163
|
+
cache_write_extended_input_tokens: cache_write_extended,
|
|
164
|
+
hidden_output_tokens: hidden_output
|
|
113
165
|
)
|
|
114
166
|
end
|
|
115
167
|
|
|
168
|
+
def warn_unexpected_cache_creation(cache_creation, usage)
|
|
169
|
+
return if cache_creation.nil? || usage.key?("cache_creation_input_tokens")
|
|
170
|
+
|
|
171
|
+
Logging.warn("Anthropic usage.cache_creation has unexpected shape: #{cache_creation.class}")
|
|
172
|
+
end
|
|
173
|
+
|
|
116
174
|
def pricing_mode(request:, response:, usage:)
|
|
117
175
|
modes = []
|
|
118
176
|
speed = usage&.fetch("speed", nil) || response&.fetch("speed", nil) || request["speed"]
|
|
119
177
|
service_tier = usage&.fetch("service_tier", nil) ||
|
|
120
178
|
response&.fetch("service_tier", nil) ||
|
|
121
179
|
request["service_tier"]
|
|
180
|
+
service_tier = nil if STANDARD_EQUIVALENT_SERVICE_TIERS.include?(service_tier.to_s)
|
|
122
181
|
|
|
123
182
|
modes << Pricing.normalize_mode(speed)
|
|
124
183
|
modes << Pricing.normalize_mode(service_tier)
|
|
125
|
-
|
|
184
|
+
geo = inference_geo(request: request, response: response, usage: usage).downcase
|
|
185
|
+
modes << "data_residency" if DATA_RESIDENCY_GEOS.include?(geo)
|
|
126
186
|
|
|
127
187
|
modes = modes.compact.uniq
|
|
128
188
|
modes.empty? ? nil : modes.join("_")
|
|
@@ -32,7 +32,9 @@ module LlmCostTracker
|
|
|
32
32
|
nil
|
|
33
33
|
end
|
|
34
34
|
|
|
35
|
-
|
|
35
|
+
def auto_enable_stream_usage?(_request_url)
|
|
36
|
+
false
|
|
37
|
+
end
|
|
36
38
|
|
|
37
39
|
def safe_json_parse(body)
|
|
38
40
|
return {} if body.blank?
|
|
@@ -42,6 +44,8 @@ module LlmCostTracker
|
|
|
42
44
|
{}
|
|
43
45
|
end
|
|
44
46
|
|
|
47
|
+
private
|
|
48
|
+
|
|
45
49
|
def uri_matches?(url)
|
|
46
50
|
uri = parsed_uri(url)
|
|
47
51
|
uri ? yield(uri) : false
|
|
@@ -59,7 +63,10 @@ module LlmCostTracker
|
|
|
59
63
|
)
|
|
60
64
|
extra_match = block_given? ? yield(uri) : true
|
|
61
65
|
|
|
62
|
-
host_match && path_match
|
|
66
|
+
next false unless host_match && path_match
|
|
67
|
+
next false unless extra_match
|
|
68
|
+
|
|
69
|
+
true
|
|
63
70
|
end
|
|
64
71
|
end
|
|
65
72
|
|
|
@@ -100,7 +107,8 @@ module LlmCostTracker
|
|
|
100
107
|
nil
|
|
101
108
|
end
|
|
102
109
|
|
|
103
|
-
def build_unknown_stream_usage(provider:, model:, provider_response_id:, pricing_mode: nil
|
|
110
|
+
def build_unknown_stream_usage(provider:, model:, provider_response_id:, pricing_mode: nil,
|
|
111
|
+
service_line_items: nil)
|
|
104
112
|
UsageCapture.build(
|
|
105
113
|
provider: provider,
|
|
106
114
|
provider_response_id: provider_response_id,
|
|
@@ -108,7 +116,8 @@ module LlmCostTracker
|
|
|
108
116
|
model: model || UsageCapture::UNKNOWN_MODEL,
|
|
109
117
|
token_usage: TokenUsage.build(input_tokens: 0, output_tokens: 0, total_tokens: 0),
|
|
110
118
|
stream: true,
|
|
111
|
-
usage_source: :unknown
|
|
119
|
+
usage_source: :unknown,
|
|
120
|
+
service_line_items: service_line_items
|
|
112
121
|
)
|
|
113
122
|
end
|
|
114
123
|
end
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative "../billing/line_item"
|
|
3
4
|
require_relative "base"
|
|
4
5
|
|
|
5
6
|
module LlmCostTracker
|
|
@@ -8,6 +9,7 @@ module LlmCostTracker
|
|
|
8
9
|
HOSTS = %w[generativelanguage.googleapis.com].freeze
|
|
9
10
|
TRACKED_PATH_PATTERN = %r{/models/[^/:]+:(?:generateContent|streamGenerateContent)\z}
|
|
10
11
|
STREAM_PATH_PATTERN = /:streamGenerateContent\z/
|
|
12
|
+
PER_QUERY_GROUNDING_MODEL_PATTERN = /\bgemini-(?:[3-9]|[1-9]\d)\b/i
|
|
11
13
|
|
|
12
14
|
def match?(url)
|
|
13
15
|
match_uri?(url, hosts: HOSTS, path_pattern: TRACKED_PATH_PATTERN)
|
|
@@ -31,12 +33,14 @@ module LlmCostTracker
|
|
|
31
33
|
return nil unless usage
|
|
32
34
|
|
|
33
35
|
request = safe_json_parse(request_body)
|
|
36
|
+
model = extract_model_from_url(request_url)
|
|
34
37
|
build_usage_capture(
|
|
35
38
|
request_url: request_url,
|
|
36
39
|
usage: usage,
|
|
37
40
|
usage_source: :response,
|
|
38
41
|
provider_response_id: response["responseId"],
|
|
39
|
-
pricing_mode: pricing_mode(request: request, response_headers: response_headers)
|
|
42
|
+
pricing_mode: pricing_mode(request: request, response_headers: response_headers),
|
|
43
|
+
service_line_items: grounding_line_items_for_response(response, model: model)
|
|
40
44
|
)
|
|
41
45
|
end
|
|
42
46
|
|
|
@@ -48,6 +52,7 @@ module LlmCostTracker
|
|
|
48
52
|
model = extract_model_from_url(request_url)
|
|
49
53
|
response_id = stream_response_id(events)
|
|
50
54
|
mode = pricing_mode(request: request, response_headers: response_headers)
|
|
55
|
+
service_line_items = grounding_line_items_for_stream(events, model: model)
|
|
51
56
|
|
|
52
57
|
if usage
|
|
53
58
|
build_usage_capture(
|
|
@@ -56,39 +61,51 @@ module LlmCostTracker
|
|
|
56
61
|
stream: true,
|
|
57
62
|
usage_source: :stream_final,
|
|
58
63
|
provider_response_id: response_id,
|
|
59
|
-
pricing_mode: mode
|
|
64
|
+
pricing_mode: mode,
|
|
65
|
+
service_line_items: service_line_items
|
|
60
66
|
)
|
|
61
67
|
else
|
|
62
68
|
build_unknown_stream_usage(
|
|
63
69
|
provider: "gemini",
|
|
64
70
|
model: model,
|
|
65
71
|
provider_response_id: response_id,
|
|
66
|
-
pricing_mode: mode
|
|
72
|
+
pricing_mode: mode,
|
|
73
|
+
service_line_items: service_line_items
|
|
67
74
|
)
|
|
68
75
|
end
|
|
69
76
|
end
|
|
70
77
|
|
|
78
|
+
def provider_for(_request_url)
|
|
79
|
+
"gemini"
|
|
80
|
+
end
|
|
81
|
+
|
|
71
82
|
private
|
|
72
83
|
|
|
73
84
|
def build_usage_capture(request_url:, usage:, usage_source:, stream: false, provider_response_id: nil,
|
|
74
|
-
pricing_mode: nil)
|
|
85
|
+
pricing_mode: nil, service_line_items: nil)
|
|
75
86
|
cache_read = usage["cachedContentTokenCount"].to_i
|
|
76
87
|
tool_use_prompt = usage["toolUsePromptTokenCount"].to_i
|
|
88
|
+
audio_input = audio_input_tokens(usage)
|
|
89
|
+
audio_output = audio_output_tokens(usage)
|
|
77
90
|
|
|
78
91
|
UsageCapture.build(
|
|
79
92
|
provider: "gemini",
|
|
80
93
|
model: extract_model_from_url(request_url),
|
|
81
94
|
pricing_mode: pricing_mode,
|
|
82
95
|
token_usage: TokenUsage.build(
|
|
83
|
-
input_tokens:
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
96
|
+
input_tokens: regular_input_tokens(usage: usage, cache_read: cache_read, audio_input: audio_input) +
|
|
97
|
+
tool_use_prompt,
|
|
98
|
+
output_tokens: regular_output_tokens(usage: usage, audio_output: audio_output),
|
|
99
|
+
total_tokens: usage["totalTokenCount"],
|
|
100
|
+
cache_read_input_tokens: cache_read,
|
|
101
|
+
audio_input_tokens: audio_input,
|
|
102
|
+
audio_output_tokens: audio_output,
|
|
87
103
|
hidden_output_tokens: usage["thoughtsTokenCount"]
|
|
88
104
|
),
|
|
89
105
|
stream: stream,
|
|
90
106
|
usage_source: usage_source,
|
|
91
|
-
provider_response_id: provider_response_id
|
|
107
|
+
provider_response_id: provider_response_id,
|
|
108
|
+
service_line_items: service_line_items
|
|
92
109
|
)
|
|
93
110
|
end
|
|
94
111
|
|
|
@@ -100,14 +117,41 @@ module LlmCostTracker
|
|
|
100
117
|
end
|
|
101
118
|
|
|
102
119
|
def output_tokens(usage)
|
|
103
|
-
usage["candidatesTokenCount"].to_i
|
|
120
|
+
(usage["candidatesTokenCount"] || usage["responseTokenCount"]).to_i + usage["thoughtsTokenCount"].to_i
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def regular_input_tokens(usage:, cache_read:, audio_input:)
|
|
124
|
+
[usage["promptTokenCount"].to_i - cache_read - audio_input, 0].max
|
|
104
125
|
end
|
|
105
126
|
|
|
106
|
-
def
|
|
107
|
-
|
|
108
|
-
|
|
127
|
+
def regular_output_tokens(usage:, audio_output:)
|
|
128
|
+
[output_tokens(usage) - audio_output, 0].max
|
|
129
|
+
end
|
|
109
130
|
|
|
110
|
-
|
|
131
|
+
def audio_input_tokens(usage)
|
|
132
|
+
prompt_audio = modality_tokens(usage["promptTokensDetails"] || usage["prompt_tokens_details"], "AUDIO")
|
|
133
|
+
cache_audio = modality_tokens(usage["cacheTokensDetails"] || usage["cache_tokens_details"], "AUDIO")
|
|
134
|
+
[prompt_audio - cache_audio, 0].max
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def audio_output_tokens(usage)
|
|
138
|
+
modality_tokens(
|
|
139
|
+
usage["candidatesTokensDetails"] ||
|
|
140
|
+
usage["candidates_tokens_details"] ||
|
|
141
|
+
usage["responseTokensDetails"] ||
|
|
142
|
+
usage["response_tokens_details"],
|
|
143
|
+
"AUDIO"
|
|
144
|
+
)
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def modality_tokens(details, modality)
|
|
148
|
+
Array(details).sum do |detail|
|
|
149
|
+
next 0 unless detail.is_a?(Hash)
|
|
150
|
+
|
|
151
|
+
next 0 unless detail["modality"] == modality
|
|
152
|
+
|
|
153
|
+
(detail["tokenCount"] || detail["token_count"]).to_i
|
|
154
|
+
end
|
|
111
155
|
end
|
|
112
156
|
|
|
113
157
|
def stream_response_id(events)
|
|
@@ -133,12 +177,58 @@ module LlmCostTracker
|
|
|
133
177
|
request.dig("config", "service_tier") ||
|
|
134
178
|
request.dig("config", "serviceTier")
|
|
135
179
|
)
|
|
136
|
-
request_mode ==
|
|
180
|
+
request_mode == :flex ? request_mode : nil
|
|
137
181
|
end
|
|
138
182
|
|
|
139
183
|
def response_header(headers, name)
|
|
140
184
|
headers.to_h.find { |key, _value| key.to_s.downcase == name }&.last
|
|
141
185
|
end
|
|
186
|
+
|
|
187
|
+
def grounding_line_items_for_response(response, model:)
|
|
188
|
+
grounding_line_items(grounding_request_count(response["candidates"]), model: model)
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
def grounding_line_items_for_stream(events, model:)
|
|
192
|
+
quantity = find_event_value(events, reverse: true) do |data|
|
|
193
|
+
count = grounding_request_count(data["candidates"])
|
|
194
|
+
count if count.positive?
|
|
195
|
+
end
|
|
196
|
+
grounding_line_items(quantity || 0, model: model)
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
def grounding_request_count(candidates)
|
|
200
|
+
Array(candidates).sum do |candidate|
|
|
201
|
+
next 0 unless candidate.is_a?(Hash)
|
|
202
|
+
|
|
203
|
+
metadata = candidate["groundingMetadata"] || candidate["grounding_metadata"] || {}
|
|
204
|
+
queries = metadata["webSearchQueries"] || metadata["web_search_queries"] || []
|
|
205
|
+
Array(queries).size
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
def grounding_line_items(query_count, model:)
|
|
210
|
+
return [] unless query_count.positive?
|
|
211
|
+
|
|
212
|
+
billed_quantity = grounding_billed_quantity(query_count, model: model)
|
|
213
|
+
[
|
|
214
|
+
Billing::LineItem.build(
|
|
215
|
+
component_key: :grounding_request,
|
|
216
|
+
quantity: billed_quantity,
|
|
217
|
+
cost_status: Billing::CostStatus::UNKNOWN,
|
|
218
|
+
pricing_basis: :provider_usage,
|
|
219
|
+
provider_field: "response.candidates.groundingMetadata.webSearchQueries",
|
|
220
|
+
details: { web_search_queries: query_count }
|
|
221
|
+
)
|
|
222
|
+
]
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
def grounding_billed_quantity(query_count, model:)
|
|
226
|
+
per_query_billing?(model) ? query_count : 1
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
def per_query_billing?(model)
|
|
230
|
+
model.to_s.match?(PER_QUERY_GROUNDING_MODEL_PATTERN)
|
|
231
|
+
end
|
|
142
232
|
end
|
|
143
233
|
end
|
|
144
234
|
end
|
|
@@ -21,7 +21,19 @@ module LlmCostTracker
|
|
|
21
21
|
gb.api.openai.com
|
|
22
22
|
ae.api.openai.com
|
|
23
23
|
].freeze
|
|
24
|
-
TRACKED_PATHS = %w[
|
|
24
|
+
TRACKED_PATHS = %w[
|
|
25
|
+
/v1/chat/completions
|
|
26
|
+
/v1/completions
|
|
27
|
+
/v1/embeddings
|
|
28
|
+
/v1/responses
|
|
29
|
+
/v1/images/generations
|
|
30
|
+
/v1/images/edits
|
|
31
|
+
/v1/images/variations
|
|
32
|
+
/v1/audio/transcriptions
|
|
33
|
+
/v1/audio/translations
|
|
34
|
+
/v1/audio/speech
|
|
35
|
+
/v1/moderations
|
|
36
|
+
].freeze
|
|
25
37
|
|
|
26
38
|
def match?(url)
|
|
27
39
|
match_uri?(url, hosts: HOSTS, exact_paths: TRACKED_PATHS)
|
|
@@ -49,7 +61,9 @@ module LlmCostTracker
|
|
|
49
61
|
)
|
|
50
62
|
end
|
|
51
63
|
|
|
52
|
-
|
|
64
|
+
def auto_enable_stream_usage?(request_url)
|
|
65
|
+
openai_chat_completions_url?(request_url)
|
|
66
|
+
end
|
|
53
67
|
|
|
54
68
|
def provider_for(_request_url)
|
|
55
69
|
"openai"
|