llm_cost_tracker 0.9.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +29 -1
- data/README.md +2 -1
- data/app/controllers/llm_cost_tracker/application_controller.rb +1 -1
- data/app/controllers/llm_cost_tracker/calls_controller.rb +16 -4
- data/app/helpers/llm_cost_tracker/application_helper.rb +1 -1
- data/app/models/llm_cost_tracker/provider_invoice_import.rb +9 -4
- data/app/services/llm_cost_tracker/dashboard/setup_state.rb +110 -0
- data/app/views/llm_cost_tracker/calls/show.html.erb +1 -1
- data/app/views/llm_cost_tracker/data_quality/index.html.erb +1 -1
- data/lib/llm_cost_tracker/billing/cost_status.rb +21 -25
- data/lib/llm_cost_tracker/billing/line_item.rb +15 -49
- data/lib/llm_cost_tracker/budget.rb +28 -6
- data/lib/llm_cost_tracker/capture/stream_collector.rb +35 -29
- data/lib/llm_cost_tracker/capture/stream_tracker.rb +1 -1
- data/lib/llm_cost_tracker/configuration.rb +31 -28
- data/lib/llm_cost_tracker/doctor/capture_verifier.rb +1 -1
- data/lib/llm_cost_tracker/doctor/ingestion_check.rb +8 -8
- data/lib/llm_cost_tracker/doctor/legacy_audit_check.rb +0 -2
- data/lib/llm_cost_tracker/doctor/legacy_billing_status_check.rb +0 -2
- data/lib/llm_cost_tracker/doctor.rb +6 -17
- data/lib/llm_cost_tracker/engine.rb +1 -2
- data/lib/llm_cost_tracker/errors.rb +3 -2
- data/lib/llm_cost_tracker/event.rb +47 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/{durable_ingestion_generator.rb → async_ingestion_generator.rb} +8 -8
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/install_generator.rb +4 -23
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/{create_llm_cost_tracker_durable_ingestion.rb.erb → create_llm_cost_tracker_async_ingestion.rb.erb} +3 -3
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_reconciliation.rb.erb +6 -1
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +14 -7
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_call_rollups_provider.rb.erb +23 -8
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_call_tags_key_value_index.rb.erb +5 -5
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_provider_invoice_imports_provider.rb.erb +32 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_provider_invoices_metadata_index.rb.erb +25 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_call_rollups_provider_generator.rb +0 -9
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_provider_invoice_imports_provider_generator.rb +31 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_provider_invoices_metadata_index_generator.rb +31 -0
- data/lib/llm_cost_tracker/ingestion/batch.rb +5 -2
- data/lib/llm_cost_tracker/ingestion/inbox.rb +3 -24
- data/lib/llm_cost_tracker/ingestion/pool.rb +44 -0
- data/lib/llm_cost_tracker/ingestion/worker.rb +22 -36
- data/lib/llm_cost_tracker/ingestion.rb +8 -9
- data/lib/llm_cost_tracker/integrations/anthropic.rb +28 -42
- data/lib/llm_cost_tracker/integrations/base.rb +14 -11
- data/lib/llm_cost_tracker/integrations/openai.rb +93 -66
- data/lib/llm_cost_tracker/integrations/ruby_llm.rb +18 -20
- data/lib/llm_cost_tracker/integrations.rb +14 -13
- data/lib/llm_cost_tracker/ledger/period/totals.rb +5 -3
- data/lib/llm_cost_tracker/ledger/rollups.rb +4 -13
- data/lib/llm_cost_tracker/ledger/schema/call_line_items.rb +11 -0
- data/lib/llm_cost_tracker/ledger/schema/call_rollups.rb +13 -3
- data/lib/llm_cost_tracker/ledger/schema/call_tags.rb +11 -0
- data/lib/llm_cost_tracker/ledger/schema/calls.rb +0 -4
- data/lib/llm_cost_tracker/ledger/schema/ingestion_inbox_entries.rb +13 -3
- data/lib/llm_cost_tracker/ledger/schema/ingestion_leases.rb +13 -3
- data/lib/llm_cost_tracker/ledger/schema/provider_invoice_imports.rb +19 -9
- data/lib/llm_cost_tracker/ledger/schema/provider_invoices.rb +26 -11
- data/lib/llm_cost_tracker/ledger/store.rb +21 -18
- data/lib/llm_cost_tracker/ledger/tags/query.rb +0 -1
- data/lib/llm_cost_tracker/logging.rb +0 -4
- data/lib/llm_cost_tracker/middleware/faraday.rb +44 -16
- data/lib/llm_cost_tracker/parsers/anthropic.rb +21 -28
- data/lib/llm_cost_tracker/parsers/azure.rb +46 -0
- data/lib/llm_cost_tracker/parsers/base.rb +53 -47
- data/lib/llm_cost_tracker/parsers/gemini.rb +20 -22
- data/lib/llm_cost_tracker/parsers/openai.rb +8 -40
- data/lib/llm_cost_tracker/parsers/openai_compatible.rb +26 -43
- data/lib/llm_cost_tracker/parsers/openai_service_charges.rb +45 -16
- data/lib/llm_cost_tracker/parsers/openai_usage.rb +16 -20
- data/lib/llm_cost_tracker/parsers.rb +31 -4
- data/lib/llm_cost_tracker/prices.json +567 -579
- data/lib/llm_cost_tracker/pricing/backfill.rb +140 -0
- data/lib/llm_cost_tracker/pricing/effective_prices.rb +2 -4
- data/lib/llm_cost_tracker/pricing/estimator.rb +33 -0
- data/lib/llm_cost_tracker/pricing/explainer.rb +4 -1
- data/lib/llm_cost_tracker/pricing/lookup.rb +37 -2
- data/lib/llm_cost_tracker/pricing/registry.rb +0 -7
- data/lib/llm_cost_tracker/pricing/service_charges.rb +5 -9
- data/lib/llm_cost_tracker/pricing/{sync_change_printer.rb → sync/change_printer.rb} +3 -3
- data/lib/llm_cost_tracker/pricing/sync/registry_writer.rb +14 -2
- data/lib/llm_cost_tracker/pricing/sync.rb +1 -9
- data/lib/llm_cost_tracker/pricing/unknown.rb +5 -2
- data/lib/llm_cost_tracker/pricing.rb +72 -27
- data/lib/llm_cost_tracker/providers/anthropic/tier_classification.rb +22 -0
- data/lib/llm_cost_tracker/providers/azure/hosts.rb +17 -0
- data/lib/llm_cost_tracker/providers/gemini/model_families.rb +17 -0
- data/lib/llm_cost_tracker/providers/openai/hosts.rb +35 -0
- data/lib/llm_cost_tracker/providers/openai/model_families.rb +51 -0
- data/lib/llm_cost_tracker/railtie.rb +3 -1
- data/lib/llm_cost_tracker/reconciliation/diff.rb +26 -45
- data/lib/llm_cost_tracker/reconciliation/diff_result.rb +0 -4
- data/lib/llm_cost_tracker/reconciliation/importer.rb +1 -0
- data/lib/llm_cost_tracker/reconciliation/sources/anthropic_usage.rb +4 -3
- data/lib/llm_cost_tracker/report.rb +0 -4
- data/lib/llm_cost_tracker/retention.rb +20 -8
- data/lib/llm_cost_tracker/tags/sanitizer.rb +13 -17
- data/lib/llm_cost_tracker/token_usage.rb +4 -0
- data/lib/llm_cost_tracker/tracker.rb +33 -74
- data/lib/llm_cost_tracker/version.rb +1 -1
- data/lib/llm_cost_tracker.rb +11 -15
- data/lib/tasks/llm_cost_tracker.rake +16 -2
- metadata +18 -7
- data/lib/llm_cost_tracker/dashboard_setup_state.rb +0 -109
- data/lib/llm_cost_tracker/ingestion/inline.rb +0 -22
- data/lib/llm_cost_tracker/usage_capture.rb +0 -58
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
require_relative "base"
|
|
4
4
|
require_relative "../billing/line_item"
|
|
5
|
+
require_relative "../providers/anthropic/tier_classification"
|
|
5
6
|
|
|
6
7
|
module LlmCostTracker
|
|
7
8
|
module Integrations
|
|
@@ -45,10 +46,10 @@ module LlmCostTracker
|
|
|
45
46
|
next if input_tokens.nil? && output_tokens.nil?
|
|
46
47
|
|
|
47
48
|
LlmCostTracker::Tracker.record(
|
|
48
|
-
|
|
49
|
+
event: Event.build(
|
|
49
50
|
provider: "anthropic",
|
|
50
51
|
model: object_value(message, :model) || request[:model],
|
|
51
|
-
pricing_mode: pricing_mode(
|
|
52
|
+
pricing_mode: pricing_mode(request: request, usage: usage),
|
|
52
53
|
token_usage: token_usage(usage: usage, input_tokens: input_tokens, output_tokens: output_tokens),
|
|
53
54
|
usage_source: :sdk_response,
|
|
54
55
|
provider_response_id: object_value(message, :id),
|
|
@@ -74,7 +75,7 @@ module LlmCostTracker
|
|
|
74
75
|
end
|
|
75
76
|
|
|
76
77
|
def line_item_for_server_tool(server_tool_use, component_key, count_key, provider_field)
|
|
77
|
-
quantity =
|
|
78
|
+
quantity = object_value(server_tool_use, count_key).to_i
|
|
78
79
|
return nil if quantity.zero?
|
|
79
80
|
|
|
80
81
|
Billing::LineItem.build(
|
|
@@ -86,14 +87,6 @@ module LlmCostTracker
|
|
|
86
87
|
)
|
|
87
88
|
end
|
|
88
89
|
|
|
89
|
-
def server_tool_count(server_tool_use, count_key)
|
|
90
|
-
direct = object_value(server_tool_use, count_key).to_i
|
|
91
|
-
return direct if direct.positive?
|
|
92
|
-
return 0 unless server_tool_use.respond_to?(:to_h)
|
|
93
|
-
|
|
94
|
-
server_tool_use.to_h[count_key].to_i
|
|
95
|
-
end
|
|
96
|
-
|
|
97
90
|
def token_usage(usage:, input_tokens:, output_tokens:)
|
|
98
91
|
cache_creation = object_value(usage, :cache_creation)
|
|
99
92
|
if cache_creation
|
|
@@ -118,65 +111,58 @@ module LlmCostTracker
|
|
|
118
111
|
)
|
|
119
112
|
end
|
|
120
113
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
def pricing_mode(message:, request:, usage:)
|
|
127
|
-
service_tier = object_value(usage, :service_tier) ||
|
|
128
|
-
object_value(message, :service_tier) ||
|
|
129
|
-
request[:service_tier]
|
|
130
|
-
service_tier = nil if STANDARD_EQUIVALENT_SERVICE_TIERS.include?(service_tier.to_s)
|
|
114
|
+
def pricing_mode(request:, usage:)
|
|
115
|
+
service_tier = object_value(usage, :service_tier) || request[:service_tier]
|
|
116
|
+
tier = Providers::Anthropic::TierClassification
|
|
117
|
+
service_tier = nil if tier.standard_equivalent_tier?(service_tier)
|
|
131
118
|
|
|
132
119
|
modes = [
|
|
133
|
-
Pricing.normalize_mode(object_value(usage, :speed) ||
|
|
120
|
+
Pricing.normalize_mode(object_value(usage, :speed) || request[:speed]),
|
|
134
121
|
Pricing.normalize_mode(service_tier)
|
|
135
122
|
]
|
|
136
|
-
geo = inference_geo(
|
|
137
|
-
modes << "data_residency" if
|
|
123
|
+
geo = inference_geo(request: request, usage: usage).to_s.downcase
|
|
124
|
+
modes << "data_residency" if tier.data_residency_geo?(geo)
|
|
138
125
|
modes = modes.compact.uniq
|
|
139
126
|
modes.empty? ? nil : modes.join("_")
|
|
140
127
|
end
|
|
141
128
|
|
|
142
129
|
def stream_pricing_mode(request)
|
|
143
|
-
pricing_mode(
|
|
130
|
+
pricing_mode(request: request || {}, usage: nil)
|
|
144
131
|
end
|
|
145
132
|
|
|
146
|
-
def inference_geo(
|
|
147
|
-
object_value(usage, :inference_geo) ||
|
|
148
|
-
|
|
149
|
-
|
|
133
|
+
def inference_geo(request:, usage:)
|
|
134
|
+
object_value(usage, :inference_geo) || request[:inference_geo]
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def wrap_stream_call(args, kwargs)
|
|
138
|
+
request = request_params(args, kwargs)
|
|
139
|
+
enforce_budget!(request: request)
|
|
140
|
+
collector = stream_collector(request)
|
|
141
|
+
stream = yield
|
|
142
|
+
track_stream(stream, collector: collector)
|
|
150
143
|
end
|
|
151
144
|
end
|
|
152
145
|
|
|
153
146
|
module MessagesPatch
|
|
154
147
|
def create(*args, **kwargs)
|
|
155
|
-
LlmCostTracker::Integrations::Anthropic.
|
|
148
|
+
request = LlmCostTracker::Integrations::Anthropic.request_params(args, kwargs)
|
|
149
|
+
LlmCostTracker::Integrations::Anthropic.enforce_budget!(request: request)
|
|
156
150
|
started_at = LlmCostTracker::Timing.now_monotonic
|
|
157
151
|
message = super
|
|
158
152
|
LlmCostTracker::Integrations::Anthropic.record_message(
|
|
159
153
|
message,
|
|
160
|
-
request:
|
|
161
|
-
latency_ms: LlmCostTracker::
|
|
154
|
+
request: request,
|
|
155
|
+
latency_ms: LlmCostTracker::Timing.elapsed_ms(started_at)
|
|
162
156
|
)
|
|
163
157
|
message
|
|
164
158
|
end
|
|
165
159
|
|
|
166
160
|
def stream(*args, **kwargs)
|
|
167
|
-
|
|
168
|
-
LlmCostTracker::Integrations::Anthropic.enforce_budget!
|
|
169
|
-
collector = LlmCostTracker::Integrations::Anthropic.stream_collector(request)
|
|
170
|
-
stream = super
|
|
171
|
-
LlmCostTracker::Integrations::Anthropic.track_stream(stream, collector: collector)
|
|
161
|
+
LlmCostTracker::Integrations::Anthropic.wrap_stream_call(args, kwargs) { super }
|
|
172
162
|
end
|
|
173
163
|
|
|
174
164
|
def stream_raw(*args, **kwargs)
|
|
175
|
-
|
|
176
|
-
LlmCostTracker::Integrations::Anthropic.enforce_budget!
|
|
177
|
-
collector = LlmCostTracker::Integrations::Anthropic.stream_collector(request)
|
|
178
|
-
stream = super
|
|
179
|
-
LlmCostTracker::Integrations::Anthropic.track_stream(stream, collector: collector)
|
|
165
|
+
LlmCostTracker::Integrations::Anthropic.wrap_stream_call(args, kwargs) { super }
|
|
180
166
|
end
|
|
181
167
|
end
|
|
182
168
|
end
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
require "active_support/core_ext/hash/indifferent_access"
|
|
4
4
|
require "active_support/core_ext/string/inflections"
|
|
5
5
|
|
|
6
|
+
require_relative "../doctor/check"
|
|
6
7
|
require_relative "../logging"
|
|
7
8
|
require_relative "../timing"
|
|
8
9
|
require_relative "../capture/stream_collector"
|
|
@@ -11,7 +12,7 @@ require_relative "../capture/stream_tracker"
|
|
|
11
12
|
module LlmCostTracker
|
|
12
13
|
module Integrations
|
|
13
14
|
module Base
|
|
14
|
-
Result =
|
|
15
|
+
Result = LlmCostTracker::Doctor::Check
|
|
15
16
|
|
|
16
17
|
def active?
|
|
17
18
|
LlmCostTracker.configuration.instrumented?(integration_name)
|
|
@@ -26,26 +27,28 @@ module LlmCostTracker
|
|
|
26
27
|
end
|
|
27
28
|
|
|
28
29
|
def status
|
|
29
|
-
name = integration_name
|
|
30
|
+
name = integration_name.to_s
|
|
30
31
|
problems = version_problems + target_problems
|
|
31
32
|
if problems.any?
|
|
32
|
-
return Result.new(
|
|
33
|
+
return Result.new(:warn, name, "#{name} integration cannot be installed: #{problems.join('; ')}")
|
|
33
34
|
end
|
|
34
35
|
|
|
35
36
|
installed = patch_targets.reject { |target| target.fetch(:optional) }.all? do |target|
|
|
36
37
|
target.fetch(:constant_name).to_s.safe_constantize&.ancestors&.include?(target.fetch(:patch))
|
|
37
38
|
end
|
|
38
|
-
return Result.new(
|
|
39
|
+
return Result.new(:ok, name, "#{name} integration installed") if installed
|
|
39
40
|
|
|
40
|
-
Result.new(
|
|
41
|
+
Result.new(:warn, name, "#{name} integration is enabled but not installed")
|
|
41
42
|
end
|
|
42
43
|
|
|
43
|
-
def
|
|
44
|
-
|
|
45
|
-
end
|
|
44
|
+
def enforce_budget!(request:)
|
|
45
|
+
return unless active?
|
|
46
46
|
|
|
47
|
-
|
|
48
|
-
|
|
47
|
+
LlmCostTracker::Tracker.enforce_budget!(
|
|
48
|
+
provider: integration_name.to_s,
|
|
49
|
+
model: request[:model],
|
|
50
|
+
request: request
|
|
51
|
+
)
|
|
49
52
|
end
|
|
50
53
|
|
|
51
54
|
def record_safely
|
|
@@ -61,7 +64,7 @@ module LlmCostTracker
|
|
|
61
64
|
case args.first
|
|
62
65
|
when Hash then args.first
|
|
63
66
|
when nil then {}
|
|
64
|
-
else args.first.
|
|
67
|
+
else args.first.to_h
|
|
65
68
|
end
|
|
66
69
|
params.merge(kwargs).with_indifferent_access
|
|
67
70
|
rescue StandardError
|
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
require_relative "base"
|
|
4
4
|
require_relative "../billing/line_item"
|
|
5
5
|
require_relative "../parsers/openai_service_charges"
|
|
6
|
+
require_relative "../providers/azure/hosts"
|
|
7
|
+
require_relative "../providers/openai/model_families"
|
|
6
8
|
|
|
7
9
|
module LlmCostTracker
|
|
8
10
|
module Integrations
|
|
@@ -24,22 +26,35 @@ module LlmCostTracker
|
|
|
24
26
|
|
|
25
27
|
def stream_collector(request, host: nil)
|
|
26
28
|
LlmCostTracker::Capture::StreamCollector.new(
|
|
27
|
-
provider:
|
|
29
|
+
provider: provider_for_host(host),
|
|
28
30
|
model: request[:model],
|
|
29
31
|
pricing_mode: stream_pricing_mode(request, host: host),
|
|
30
32
|
request: request
|
|
31
33
|
)
|
|
32
34
|
end
|
|
33
35
|
|
|
36
|
+
def wrap_stream_call(args, kwargs, resource)
|
|
37
|
+
request = request_params(args, kwargs)
|
|
38
|
+
enforce_budget!(request: request)
|
|
39
|
+
host = client_host_for(resource)
|
|
40
|
+
collector = stream_collector(request, host: host)
|
|
41
|
+
stream = yield(normalize_sdk_args(args, kwargs), collector)
|
|
42
|
+
track_stream(stream, collector: collector)
|
|
43
|
+
end
|
|
44
|
+
|
|
34
45
|
def client_host_for(resource)
|
|
35
46
|
client = resource.instance_variable_get(:@client)
|
|
36
|
-
return nil unless client
|
|
47
|
+
return nil unless client
|
|
37
48
|
|
|
38
|
-
URI.parse(client.
|
|
49
|
+
URI.parse(client.base_url.to_s).host
|
|
39
50
|
rescue URI::InvalidURIError
|
|
40
51
|
nil
|
|
41
52
|
end
|
|
42
53
|
|
|
54
|
+
def provider_for_host(host)
|
|
55
|
+
LlmCostTracker::Providers::Azure::Hosts.openai?(host) ? "azure_openai" : "openai"
|
|
56
|
+
end
|
|
57
|
+
|
|
43
58
|
def minimum_version
|
|
44
59
|
"0.59.0"
|
|
45
60
|
end
|
|
@@ -97,8 +112,8 @@ module LlmCostTracker
|
|
|
97
112
|
cache_read = cache_read_input_tokens(usage)
|
|
98
113
|
model = object_value(response, :model) || request[:model]
|
|
99
114
|
LlmCostTracker::Tracker.record(
|
|
100
|
-
|
|
101
|
-
provider:
|
|
115
|
+
event: Event.build(
|
|
116
|
+
provider: provider_for_host(host),
|
|
102
117
|
model: model,
|
|
103
118
|
pricing_mode: LlmCostTracker::Parsers::OpenaiUsage.combined_pricing_mode(
|
|
104
119
|
host: host,
|
|
@@ -115,7 +130,7 @@ module LlmCostTracker
|
|
|
115
130
|
end
|
|
116
131
|
end
|
|
117
132
|
|
|
118
|
-
def record_image(response, request:, latency_ms:)
|
|
133
|
+
def record_image(response, request:, latency_ms:, host: nil)
|
|
119
134
|
usage = object_value(response, :usage)
|
|
120
135
|
raw_input = usage ? object_value(usage, :input_tokens).to_i : 0
|
|
121
136
|
raw_output = usage ? object_value(usage, :output_tokens).to_i : 0
|
|
@@ -127,6 +142,7 @@ module LlmCostTracker
|
|
|
127
142
|
model: request[:model],
|
|
128
143
|
response: response,
|
|
129
144
|
latency_ms: latency_ms,
|
|
145
|
+
host: host,
|
|
130
146
|
input_tokens: text_input,
|
|
131
147
|
image_input_tokens: image_input,
|
|
132
148
|
output_tokens: text_output,
|
|
@@ -144,11 +160,12 @@ module LlmCostTracker
|
|
|
144
160
|
[image_tokens, text_tokens]
|
|
145
161
|
end
|
|
146
162
|
|
|
147
|
-
def record_transcription(response, request:, latency_ms:)
|
|
163
|
+
def record_transcription(response, request:, latency_ms:, host: nil)
|
|
148
164
|
record_passthrough(
|
|
149
165
|
model: request[:model],
|
|
150
166
|
response: response,
|
|
151
167
|
latency_ms: latency_ms,
|
|
168
|
+
host: host,
|
|
152
169
|
**transcription_token_attributes(object_value(response, :usage))
|
|
153
170
|
)
|
|
154
171
|
end
|
|
@@ -165,24 +182,22 @@ module LlmCostTracker
|
|
|
165
182
|
}
|
|
166
183
|
end
|
|
167
184
|
|
|
168
|
-
def record_speech(_response, request:, latency_ms:)
|
|
185
|
+
def record_speech(_response, request:, latency_ms:, host: nil)
|
|
169
186
|
record_passthrough(
|
|
170
187
|
model: request[:model],
|
|
171
188
|
response: nil,
|
|
172
189
|
latency_ms: latency_ms,
|
|
190
|
+
host: host,
|
|
173
191
|
input_tokens: 0,
|
|
174
192
|
output_tokens: 0,
|
|
175
193
|
service_line_items: speech_line_items(request)
|
|
176
194
|
)
|
|
177
195
|
end
|
|
178
196
|
|
|
179
|
-
CHARACTER_BILLED_TTS_MODELS = /\Atts-1(-hd)?\z/
|
|
180
|
-
private_constant :CHARACTER_BILLED_TTS_MODELS
|
|
181
|
-
|
|
182
197
|
def speech_line_items(request)
|
|
183
198
|
input = request[:input]
|
|
184
199
|
return [] unless input.is_a?(String)
|
|
185
|
-
return [] unless
|
|
200
|
+
return [] unless LlmCostTracker::Providers::Openai::ModelFamilies.character_billed_tts?(request[:model])
|
|
186
201
|
|
|
187
202
|
[LlmCostTracker::Billing::LineItem.build(
|
|
188
203
|
component_key: :text_to_speech_character,
|
|
@@ -193,23 +208,24 @@ module LlmCostTracker
|
|
|
193
208
|
)]
|
|
194
209
|
end
|
|
195
210
|
|
|
196
|
-
def record_moderation(response, request:, latency_ms:)
|
|
211
|
+
def record_moderation(response, request:, latency_ms:, host: nil)
|
|
197
212
|
record_passthrough(
|
|
198
213
|
model: object_value(response, :model) || request[:model],
|
|
199
214
|
response: response,
|
|
200
215
|
latency_ms: latency_ms,
|
|
216
|
+
host: host,
|
|
201
217
|
input_tokens: 0,
|
|
202
218
|
output_tokens: 0
|
|
203
219
|
)
|
|
204
220
|
end
|
|
205
221
|
|
|
206
|
-
def record_passthrough(model:, response:, latency_ms:, service_line_items: [], **token_attributes)
|
|
222
|
+
def record_passthrough(model:, response:, latency_ms:, host: nil, service_line_items: [], **token_attributes)
|
|
207
223
|
return unless active?
|
|
208
224
|
|
|
209
225
|
record_safely do
|
|
210
226
|
LlmCostTracker::Tracker.record(
|
|
211
|
-
|
|
212
|
-
provider:
|
|
227
|
+
event: Event.build(
|
|
228
|
+
provider: provider_for_host(host),
|
|
213
229
|
model: model,
|
|
214
230
|
token_usage: TokenUsage.build(**token_attributes),
|
|
215
231
|
usage_source: :sdk_response,
|
|
@@ -222,16 +238,41 @@ module LlmCostTracker
|
|
|
222
238
|
end
|
|
223
239
|
|
|
224
240
|
def service_line_items_from(response, request: nil)
|
|
241
|
+
model = object_value(response, :model) || request&.dig(:model)
|
|
225
242
|
output = object_value(response, :output)
|
|
226
|
-
|
|
243
|
+
output_items = output.respond_to?(:each) ? output.map { |item| normalize_output_item(item) }.compact : []
|
|
244
|
+
chat_search = output_items.empty? ? chat_completions_search_item(response, model: model) : nil
|
|
245
|
+
output_items << chat_search if chat_search
|
|
246
|
+
return [] if output_items.empty?
|
|
227
247
|
|
|
228
248
|
LlmCostTracker::Parsers::OpenaiServiceCharges.line_items_from_output(
|
|
229
|
-
|
|
230
|
-
request: request,
|
|
231
|
-
model: object_value(response, :model) || request&.dig(:model)
|
|
249
|
+
output_items, request: request, model: model
|
|
232
250
|
)
|
|
233
251
|
end
|
|
234
252
|
|
|
253
|
+
def chat_completions_search_item(response, model: nil)
|
|
254
|
+
choices = object_value(response, :choices)
|
|
255
|
+
return nil unless choices.respond_to?(:any?)
|
|
256
|
+
|
|
257
|
+
provider_field = if choices.any? { |choice| choice_used_url_citation?(choice) }
|
|
258
|
+
LlmCostTracker::Parsers::OpenaiServiceCharges::CHAT_COMPLETIONS_ANNOTATION_PROVIDER_FIELD
|
|
259
|
+
elsif LlmCostTracker::Providers::Openai::ModelFamilies.chat_completions_search?(model)
|
|
260
|
+
LlmCostTracker::Parsers::OpenaiServiceCharges::CHAT_COMPLETIONS_SEARCH_MODEL_PROVIDER_FIELD
|
|
261
|
+
end
|
|
262
|
+
return nil unless provider_field
|
|
263
|
+
|
|
264
|
+
{ "type" => "web_search_call", "id" => object_value(response, :id),
|
|
265
|
+
"action" => { "type" => "search" }, "provider_field" => provider_field }
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
def choice_used_url_citation?(choice)
|
|
269
|
+
message = object_value(choice, :message)
|
|
270
|
+
annotations = message && object_value(message, :annotations)
|
|
271
|
+
return false unless annotations.respond_to?(:any?)
|
|
272
|
+
|
|
273
|
+
annotations.any? { |annotation| object_value(annotation, :type).to_s == "url_citation" }
|
|
274
|
+
end
|
|
275
|
+
|
|
235
276
|
def normalize_output_item(item)
|
|
236
277
|
return item if item.is_a?(Hash)
|
|
237
278
|
return nil if item.nil?
|
|
@@ -252,9 +293,6 @@ module LlmCostTracker
|
|
|
252
293
|
{ "type" => object_value(action, :type)&.to_s }
|
|
253
294
|
end
|
|
254
295
|
|
|
255
|
-
IMAGE_OUTPUT_MODEL_PATTERN = /\Agpt-image-/i
|
|
256
|
-
private_constant :IMAGE_OUTPUT_MODEL_PATTERN
|
|
257
|
-
|
|
258
296
|
def token_usage(usage:, input_tokens:, output_tokens:, cache_read:, model: nil)
|
|
259
297
|
audio_input = audio_input_tokens(usage)
|
|
260
298
|
audio_output = audio_output_tokens(usage)
|
|
@@ -266,7 +304,7 @@ module LlmCostTracker
|
|
|
266
304
|
image_output_details: image_output_details,
|
|
267
305
|
text_output_details: text_output_details,
|
|
268
306
|
audio_output: audio_output,
|
|
269
|
-
default_to_image:
|
|
307
|
+
default_to_image: LlmCostTracker::Providers::Openai::ModelFamilies.image_output?(model)
|
|
270
308
|
)
|
|
271
309
|
|
|
272
310
|
TokenUsage.build(
|
|
@@ -319,77 +357,64 @@ module LlmCostTracker
|
|
|
319
357
|
|
|
320
358
|
module ResponsesPatch
|
|
321
359
|
def create(*args, **kwargs)
|
|
322
|
-
LlmCostTracker::Integrations::Openai.
|
|
360
|
+
request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
|
|
361
|
+
LlmCostTracker::Integrations::Openai.enforce_budget!(request: request)
|
|
323
362
|
started_at = LlmCostTracker::Timing.now_monotonic
|
|
324
363
|
response = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
|
|
325
364
|
LlmCostTracker::Integrations::Openai.record_response(
|
|
326
365
|
response,
|
|
327
|
-
request:
|
|
328
|
-
latency_ms: LlmCostTracker::
|
|
366
|
+
request: request,
|
|
367
|
+
latency_ms: LlmCostTracker::Timing.elapsed_ms(started_at),
|
|
329
368
|
host: LlmCostTracker::Integrations::Openai.client_host_for(self)
|
|
330
369
|
)
|
|
331
370
|
response
|
|
332
371
|
end
|
|
333
372
|
|
|
334
373
|
def stream(*args, **kwargs)
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
collector = LlmCostTracker::Integrations::Openai.stream_collector(request, host: host)
|
|
339
|
-
stream = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
|
|
340
|
-
LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
|
|
374
|
+
LlmCostTracker::Integrations::Openai.wrap_stream_call(args, kwargs, self) do |normalized, _|
|
|
375
|
+
super(*normalized)
|
|
376
|
+
end
|
|
341
377
|
end
|
|
342
378
|
|
|
343
379
|
def stream_raw(*args, **kwargs)
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
collector = LlmCostTracker::Integrations::Openai.stream_collector(request, host: host)
|
|
348
|
-
stream = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
|
|
349
|
-
LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
|
|
380
|
+
LlmCostTracker::Integrations::Openai.wrap_stream_call(args, kwargs, self) do |normalized, _|
|
|
381
|
+
super(*normalized)
|
|
382
|
+
end
|
|
350
383
|
end
|
|
351
384
|
|
|
352
385
|
def retrieve_streaming(response_id, *args, **kwargs)
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
collector.provider_response_id = response_id
|
|
358
|
-
stream = super(response_id, *LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
|
|
359
|
-
LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
|
|
386
|
+
LlmCostTracker::Integrations::Openai.wrap_stream_call(args, kwargs, self) do |normalized, collector|
|
|
387
|
+
collector.provider_response_id = response_id
|
|
388
|
+
super(response_id, *normalized)
|
|
389
|
+
end
|
|
360
390
|
end
|
|
361
391
|
end
|
|
362
392
|
|
|
363
393
|
module ChatCompletionsPatch
|
|
364
394
|
def create(*args, **kwargs)
|
|
365
|
-
LlmCostTracker::Integrations::Openai.
|
|
395
|
+
request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
|
|
396
|
+
LlmCostTracker::Integrations::Openai.enforce_budget!(request: request)
|
|
366
397
|
started_at = LlmCostTracker::Timing.now_monotonic
|
|
367
398
|
response = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
|
|
368
399
|
LlmCostTracker::Integrations::Openai.record_response(
|
|
369
400
|
response,
|
|
370
|
-
request:
|
|
371
|
-
latency_ms: LlmCostTracker::
|
|
401
|
+
request: request,
|
|
402
|
+
latency_ms: LlmCostTracker::Timing.elapsed_ms(started_at),
|
|
372
403
|
host: LlmCostTracker::Integrations::Openai.client_host_for(self)
|
|
373
404
|
)
|
|
374
405
|
response
|
|
375
406
|
end
|
|
376
407
|
|
|
377
408
|
def stream(*args, **kwargs)
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
collector = LlmCostTracker::Integrations::Openai.stream_collector(request, host: host)
|
|
382
|
-
stream = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
|
|
383
|
-
LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
|
|
409
|
+
LlmCostTracker::Integrations::Openai.wrap_stream_call(args, kwargs, self) do |normalized, _|
|
|
410
|
+
super(*normalized)
|
|
411
|
+
end
|
|
384
412
|
end
|
|
385
413
|
|
|
386
414
|
def stream_raw(*args, **kwargs)
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
collector = LlmCostTracker::Integrations::Openai.stream_collector(request, host: host)
|
|
391
|
-
stream = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
|
|
392
|
-
LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
|
|
415
|
+
LlmCostTracker::Integrations::Openai.wrap_stream_call(args, kwargs, self) do |normalized, _|
|
|
416
|
+
super(*normalized)
|
|
417
|
+
end
|
|
393
418
|
end
|
|
394
419
|
end
|
|
395
420
|
|
|
@@ -405,13 +430,15 @@ module LlmCostTracker
|
|
|
405
430
|
def define_wrapped_method(mod, method_name, record_method)
|
|
406
431
|
mod.define_method(method_name) do |*args, **kwargs, &block|
|
|
407
432
|
integration = LlmCostTracker::Integrations::Openai
|
|
408
|
-
integration.
|
|
433
|
+
request = integration.request_params(args, kwargs)
|
|
434
|
+
integration.enforce_budget!(request: request)
|
|
409
435
|
started_at = LlmCostTracker::Timing.now_monotonic
|
|
410
436
|
response = super(*integration.normalize_sdk_args(args, kwargs), &block)
|
|
411
437
|
integration.public_send(
|
|
412
438
|
record_method, response,
|
|
413
|
-
request:
|
|
414
|
-
latency_ms:
|
|
439
|
+
request: request,
|
|
440
|
+
latency_ms: LlmCostTracker::Timing.elapsed_ms(started_at),
|
|
441
|
+
host: integration.client_host_for(self)
|
|
415
442
|
)
|
|
416
443
|
response
|
|
417
444
|
end
|
|
@@ -429,7 +456,7 @@ module LlmCostTracker
|
|
|
429
456
|
%i[generate_stream_raw edit_stream_raw].each do |method_name|
|
|
430
457
|
define_method(method_name) do |*args, **kwargs|
|
|
431
458
|
request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
|
|
432
|
-
LlmCostTracker::Integrations::Openai.enforce_budget!
|
|
459
|
+
LlmCostTracker::Integrations::Openai.enforce_budget!(request: request)
|
|
433
460
|
host = LlmCostTracker::Integrations::Openai.client_host_for(self)
|
|
434
461
|
collector = LlmCostTracker::Integrations::Openai.stream_collector(request, host: host)
|
|
435
462
|
stream = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
|
|
@@ -441,7 +468,7 @@ module LlmCostTracker
|
|
|
441
468
|
module StreamingTranscriptionsPatch
|
|
442
469
|
def create_streaming(*args, **kwargs)
|
|
443
470
|
request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
|
|
444
|
-
LlmCostTracker::Integrations::Openai.enforce_budget!
|
|
471
|
+
LlmCostTracker::Integrations::Openai.enforce_budget!(request: request)
|
|
445
472
|
host = LlmCostTracker::Integrations::Openai.client_host_for(self)
|
|
446
473
|
collector = LlmCostTracker::Integrations::Openai.stream_collector(request, host: host)
|
|
447
474
|
stream = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
|