llm_cost_tracker 0.8.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +108 -0
- data/README.md +12 -5
- data/app/assets/llm_cost_tracker/application.css +65 -5
- data/app/controllers/llm_cost_tracker/application_controller.rb +25 -33
- data/app/controllers/llm_cost_tracker/assets_controller.rb +1 -1
- data/app/controllers/llm_cost_tracker/calls_controller.rb +5 -7
- data/app/controllers/llm_cost_tracker/data_quality_controller.rb +4 -0
- data/app/controllers/llm_cost_tracker/reconciliation_controller.rb +106 -0
- data/app/controllers/llm_cost_tracker/tags_controller.rb +15 -1
- data/app/helpers/llm_cost_tracker/application_helper.rb +10 -0
- data/app/helpers/llm_cost_tracker/inline_style_helper.rb +28 -0
- data/app/helpers/llm_cost_tracker/reconciliation_helper.rb +13 -0
- data/app/helpers/llm_cost_tracker/token_usage_helper.rb +5 -1
- data/app/models/llm_cost_tracker/call.rb +0 -3
- data/app/models/llm_cost_tracker/call_line_item.rb +1 -5
- data/app/models/llm_cost_tracker/call_rollup.rb +0 -3
- data/app/models/llm_cost_tracker/call_tag.rb +0 -4
- data/app/models/llm_cost_tracker/ingestion/inbox_entry.rb +0 -4
- data/app/models/llm_cost_tracker/ingestion/lease.rb +0 -3
- data/app/models/llm_cost_tracker/provider_invoice.rb +7 -3
- data/app/models/llm_cost_tracker/provider_invoice_import.rb +24 -0
- data/app/services/llm_cost_tracker/dashboard/data_quality.rb +33 -4
- data/app/services/llm_cost_tracker/dashboard/filter.rb +6 -4
- data/app/views/layouts/llm_cost_tracker/application.html.erb +6 -1
- data/app/views/llm_cost_tracker/calls/show.html.erb +25 -40
- data/app/views/llm_cost_tracker/dashboard/index.html.erb +9 -9
- data/app/views/llm_cost_tracker/data_quality/index.html.erb +91 -52
- data/app/views/llm_cost_tracker/reconciliation/index.html.erb +183 -0
- data/app/views/llm_cost_tracker/shared/_bar.html.erb +1 -1
- data/app/views/llm_cost_tracker/shared/_filters.html.erb +3 -0
- data/app/views/llm_cost_tracker/shared/_metric_stack.html.erb +1 -1
- data/app/views/llm_cost_tracker/tags/show.html.erb +60 -0
- data/config/routes.rb +3 -2
- data/lib/llm_cost_tracker/billing/components.rb +45 -3
- data/lib/llm_cost_tracker/billing/components.yml +71 -0
- data/lib/llm_cost_tracker/billing/line_item.rb +1 -1
- data/lib/llm_cost_tracker/budget.rb +4 -2
- data/lib/llm_cost_tracker/capture/stream_collector.rb +93 -20
- data/lib/llm_cost_tracker/capture/stream_tracker.rb +40 -5
- data/lib/llm_cost_tracker/configuration.rb +53 -1
- data/lib/llm_cost_tracker/dashboard_setup_state.rb +109 -0
- data/lib/llm_cost_tracker/doctor/cost_drift_check.rb +2 -0
- data/lib/llm_cost_tracker/doctor/ingestion_check.rb +26 -0
- data/lib/llm_cost_tracker/doctor/invoice_reconciliation_check.rb +164 -0
- data/lib/llm_cost_tracker/doctor/schema_check.rb +5 -2
- data/lib/llm_cost_tracker/doctor.rb +72 -3
- data/lib/llm_cost_tracker/engine.rb +9 -0
- data/lib/llm_cost_tracker/event.rb +1 -1
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/call_rollups_generator.rb +43 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/durable_ingestion_generator.rb +43 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/install_generator.rb +13 -3
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/reconciliation_generator.rb +34 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_call_rollups.rb.erb +15 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_calls.rb.erb +5 -58
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_durable_ingestion.rb.erb +29 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_reconciliation.rb.erb +55 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +28 -25
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_call_rollups_provider.rb.erb +20 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_call_tags_key_value_index.rb.erb +32 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_image_tokens.rb.erb +18 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_call_rollups_provider_generator.rb +38 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_call_tags_key_value_index_generator.rb +30 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_image_tokens_generator.rb +29 -0
- data/lib/llm_cost_tracker/ingestion/inbox.rb +0 -1
- data/lib/llm_cost_tracker/ingestion/inline.rb +22 -0
- data/lib/llm_cost_tracker/ingestion/worker.rb +10 -2
- data/lib/llm_cost_tracker/ingestion.rb +48 -10
- data/lib/llm_cost_tracker/integrations/anthropic.rb +24 -5
- data/lib/llm_cost_tracker/integrations/base.rb +22 -5
- data/lib/llm_cost_tracker/integrations/openai.rb +300 -66
- data/lib/llm_cost_tracker/integrations/ruby_llm.rb +105 -6
- data/lib/llm_cost_tracker/integrations.rb +19 -1
- data/lib/llm_cost_tracker/ledger/period/totals.rb +21 -5
- data/lib/llm_cost_tracker/ledger/rollups.rb +24 -10
- data/lib/llm_cost_tracker/ledger/schema/call_line_items.rb +30 -1
- data/lib/llm_cost_tracker/ledger/schema/call_rollups.rb +3 -3
- data/lib/llm_cost_tracker/ledger/schema/call_tags.rb +17 -2
- data/lib/llm_cost_tracker/ledger/schema/calls.rb +2 -0
- data/lib/llm_cost_tracker/ledger/schema/ingestion_inbox_entries.rb +47 -0
- data/lib/llm_cost_tracker/ledger/schema/ingestion_leases.rb +42 -0
- data/lib/llm_cost_tracker/ledger/schema/provider_invoice_imports.rb +46 -0
- data/lib/llm_cost_tracker/ledger/schema/provider_invoices.rb +2 -2
- data/lib/llm_cost_tracker/ledger/store.rb +14 -14
- data/lib/llm_cost_tracker/ledger/tags/encoding.rb +37 -0
- data/lib/llm_cost_tracker/ledger/tags/query.rb +2 -1
- data/lib/llm_cost_tracker/ledger.rb +2 -1
- data/lib/llm_cost_tracker/masking.rb +39 -0
- data/lib/llm_cost_tracker/middleware/faraday.rb +88 -29
- data/lib/llm_cost_tracker/parsers/anthropic.rb +22 -7
- data/lib/llm_cost_tracker/parsers/base.rb +5 -1
- data/lib/llm_cost_tracker/parsers/gemini.rb +4 -0
- data/lib/llm_cost_tracker/parsers/openai.rb +16 -2
- data/lib/llm_cost_tracker/parsers/openai_compatible.rb +5 -1
- data/lib/llm_cost_tracker/parsers/openai_service_charges.rb +49 -10
- data/lib/llm_cost_tracker/parsers/openai_usage.rb +124 -53
- data/lib/llm_cost_tracker/prices.json +110 -19
- data/lib/llm_cost_tracker/pricing/effective_prices.rb +5 -36
- data/lib/llm_cost_tracker/pricing/lookup.rb +36 -3
- data/lib/llm_cost_tracker/pricing/mode.rb +76 -0
- data/lib/llm_cost_tracker/pricing/registry.rb +3 -1
- data/lib/llm_cost_tracker/pricing/service_charges.rb +9 -3
- data/lib/llm_cost_tracker/pricing/sync/registry_writer.rb +50 -1
- data/lib/llm_cost_tracker/pricing/sync.rb +3 -1
- data/lib/llm_cost_tracker/pricing.rb +47 -19
- data/lib/llm_cost_tracker/railtie.rb +6 -0
- data/lib/llm_cost_tracker/reconcile_tasks.rb +134 -0
- data/lib/llm_cost_tracker/reconciliation/diff.rb +428 -0
- data/lib/llm_cost_tracker/reconciliation/diff_result.rb +48 -0
- data/lib/llm_cost_tracker/reconciliation/import_result.rb +19 -0
- data/lib/llm_cost_tracker/reconciliation/importer.rb +253 -0
- data/lib/llm_cost_tracker/reconciliation/sources/anthropic_usage.rb +171 -0
- data/lib/llm_cost_tracker/reconciliation/sources/fingerprint.rb +20 -0
- data/lib/llm_cost_tracker/reconciliation/sources/openai_usage.rb +142 -0
- data/lib/llm_cost_tracker/reconciliation.rb +118 -0
- data/lib/llm_cost_tracker/report/data.rb +4 -1
- data/lib/llm_cost_tracker/retention.rb +15 -2
- data/lib/llm_cost_tracker/tags/context.rb +3 -4
- data/lib/llm_cost_tracker/tags/sanitizer.rb +60 -4
- data/lib/llm_cost_tracker/token_usage.rb +10 -2
- data/lib/llm_cost_tracker/tracker.rb +45 -18
- data/lib/llm_cost_tracker/version.rb +1 -1
- data/lib/llm_cost_tracker.rb +9 -0
- data/lib/tasks/llm_cost_tracker.rake +25 -2
- metadata +36 -1
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
require "faraday"
|
|
4
4
|
require "json"
|
|
5
|
+
require "stringio"
|
|
5
6
|
require "uri"
|
|
6
7
|
|
|
7
8
|
require_relative "../logging"
|
|
@@ -23,28 +24,82 @@ module LlmCostTracker
|
|
|
23
24
|
request_body = read_body(request_env.body)
|
|
24
25
|
parser = Parsers.find_for(request_url)
|
|
25
26
|
streaming = parser&.streaming_request?(request_url, request_body)
|
|
27
|
+
request_body = inject_stream_usage_flag(request_env, parser, request_url) if streaming
|
|
26
28
|
stream_buffer = install_stream_tap(request_env) if streaming
|
|
27
29
|
|
|
28
30
|
Tracker.enforce_budget! if parser
|
|
29
31
|
context_tags, metadata = tag_snapshot(request_env) if parser
|
|
30
32
|
started_at = LlmCostTracker::Timing.now_monotonic
|
|
31
33
|
|
|
34
|
+
invoke_app_with_capture(
|
|
35
|
+
request_env: request_env, parser: parser, request_url: request_url,
|
|
36
|
+
request_body: request_body, streaming: streaming, stream_buffer: stream_buffer,
|
|
37
|
+
context_tags: context_tags, metadata: metadata, started_at: started_at
|
|
38
|
+
)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
def invoke_app_with_capture(request_env:, parser:, request_url:, request_body:, streaming:,
|
|
44
|
+
stream_buffer:, context_tags:, metadata:, started_at:)
|
|
45
|
+
response_received = false
|
|
32
46
|
@app.call(request_env).on_complete do |response_env|
|
|
47
|
+
response_received = true
|
|
33
48
|
process(
|
|
34
|
-
parser: parser,
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
49
|
+
parser: parser, request_url: request_url, request_body: request_body,
|
|
50
|
+
response_env: response_env, latency_ms: LlmCostTracker::Timing.elapsed_ms(started_at),
|
|
51
|
+
streaming: streaming, stream_buffer: stream_buffer,
|
|
52
|
+
context_tags: context_tags, metadata: metadata
|
|
53
|
+
)
|
|
54
|
+
end
|
|
55
|
+
rescue StandardError => e
|
|
56
|
+
if streaming && parser && !response_received
|
|
57
|
+
process_interrupted_stream(
|
|
58
|
+
parser: parser, request_url: request_url, request_body: request_body,
|
|
38
59
|
latency_ms: LlmCostTracker::Timing.elapsed_ms(started_at),
|
|
39
|
-
|
|
40
|
-
stream_buffer: stream_buffer,
|
|
41
|
-
context_tags: context_tags,
|
|
42
|
-
metadata: metadata
|
|
60
|
+
context_tags: context_tags, metadata: metadata, error: e
|
|
43
61
|
)
|
|
44
62
|
end
|
|
63
|
+
raise
|
|
45
64
|
end
|
|
46
65
|
|
|
47
|
-
|
|
66
|
+
def inject_stream_usage_flag(request_env, parser, request_url)
|
|
67
|
+
body_string = read_body(request_env.body)
|
|
68
|
+
return body_string unless LlmCostTracker.configuration.auto_enable_stream_usage
|
|
69
|
+
return body_string unless parser&.auto_enable_stream_usage?(request_url)
|
|
70
|
+
|
|
71
|
+
body = JSON.parse(body_string)
|
|
72
|
+
return body_string if body["stream_options"].is_a?(Hash) && body["stream_options"].key?("include_usage")
|
|
73
|
+
|
|
74
|
+
body["stream_options"] = (body["stream_options"] || {}).merge("include_usage" => true)
|
|
75
|
+
new_body = body.to_json
|
|
76
|
+
request_env.body = new_body
|
|
77
|
+
new_body
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def process_interrupted_stream(parser:, request_url:, request_body:, latency_ms:,
|
|
81
|
+
context_tags:, metadata:, error:)
|
|
82
|
+
request = parser.safe_json_parse(request_body)
|
|
83
|
+
capture = UsageCapture.build(
|
|
84
|
+
provider: parser.provider_for(request_url),
|
|
85
|
+
model: request["model"] || UsageCapture::UNKNOWN_MODEL,
|
|
86
|
+
token_usage: TokenUsage.build(input_tokens: 0, output_tokens: 0, total_tokens: 0),
|
|
87
|
+
stream: true,
|
|
88
|
+
usage_source: :unknown
|
|
89
|
+
)
|
|
90
|
+
merged_metadata = (metadata || {}).merge(
|
|
91
|
+
stream_interrupted: true,
|
|
92
|
+
stream_interrupted_error: "#{error.class}: #{error.message}"
|
|
93
|
+
)
|
|
94
|
+
Tracker.record(
|
|
95
|
+
capture: capture,
|
|
96
|
+
latency_ms: latency_ms,
|
|
97
|
+
metadata: merged_metadata,
|
|
98
|
+
context_tags: context_tags
|
|
99
|
+
)
|
|
100
|
+
rescue StandardError => e
|
|
101
|
+
Logging.warn("Error recording interrupted stream: #{e.class}: #{e.message}")
|
|
102
|
+
end
|
|
48
103
|
|
|
49
104
|
def process(parser:, request_url:, request_body:, response_env:,
|
|
50
105
|
latency_ms:, streaming:, stream_buffer:, context_tags:, metadata:)
|
|
@@ -102,21 +157,14 @@ module LlmCostTracker
|
|
|
102
157
|
end
|
|
103
158
|
|
|
104
159
|
def parse_stream(parser:, request_url:, request_body:, response_env:, stream_buffer:)
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
return parser.parse_stream(
|
|
108
|
-
request_url: request_url,
|
|
109
|
-
request_body: request_body,
|
|
110
|
-
response_status: response_env.status,
|
|
111
|
-
response_headers: response_env.response_headers
|
|
112
|
-
)
|
|
113
|
-
end
|
|
160
|
+
overflowed = stream_buffer&.dig(:overflowed) == true
|
|
161
|
+
Logging.warn(capture_warning(request_url, stream_buffer)) if overflowed
|
|
114
162
|
|
|
115
163
|
body = stream_buffer&.dig(:buffer)&.string
|
|
116
164
|
body = read_body(response_env.body) if body.blank?
|
|
117
165
|
|
|
118
166
|
if body.blank?
|
|
119
|
-
Logging.warn(capture_warning(request_url, stream_buffer))
|
|
167
|
+
Logging.warn(capture_warning(request_url, stream_buffer)) unless overflowed
|
|
120
168
|
return parser.parse_stream(
|
|
121
169
|
request_url: request_url,
|
|
122
170
|
request_body: request_body,
|
|
@@ -125,7 +173,7 @@ module LlmCostTracker
|
|
|
125
173
|
)
|
|
126
174
|
end
|
|
127
175
|
|
|
128
|
-
events = Parsers::SSE.parse(body)
|
|
176
|
+
events = overflowed ? [] : Parsers::SSE.parse(body)
|
|
129
177
|
parser.parse_stream(
|
|
130
178
|
request_url: request_url,
|
|
131
179
|
request_body: request_body,
|
|
@@ -135,6 +183,17 @@ module LlmCostTracker
|
|
|
135
183
|
)
|
|
136
184
|
end
|
|
137
185
|
|
|
186
|
+
def forward_on_data_chunk(callable, chunk, size, env)
|
|
187
|
+
arity = callable.arity
|
|
188
|
+
return callable.call(chunk, size, env) if arity.negative?
|
|
189
|
+
|
|
190
|
+
case arity
|
|
191
|
+
when 0, 1 then callable.call(chunk)
|
|
192
|
+
when 2 then callable.call(chunk, size)
|
|
193
|
+
else callable.call(chunk, size, env)
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
|
|
138
197
|
def install_stream_tap(request_env)
|
|
139
198
|
request = request_env.request
|
|
140
199
|
return nil unless request
|
|
@@ -145,16 +204,16 @@ module LlmCostTracker
|
|
|
145
204
|
state = { buffer: StringIO.new, bytes: 0, overflowed: false }
|
|
146
205
|
request.on_data = proc do |chunk, size, env|
|
|
147
206
|
chunk = chunk.to_s
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
207
|
+
remaining = Capture::Stream::LIMIT_BYTES - state[:bytes]
|
|
208
|
+
if chunk.bytesize <= remaining
|
|
209
|
+
state[:buffer] << chunk
|
|
210
|
+
state[:bytes] += chunk.bytesize
|
|
211
|
+
else
|
|
212
|
+
state[:buffer] << chunk.byteslice(0, remaining) if remaining.positive?
|
|
213
|
+
state[:bytes] += [remaining, 0].max
|
|
214
|
+
state[:overflowed] = true
|
|
156
215
|
end
|
|
157
|
-
original
|
|
216
|
+
forward_on_data_chunk(original, chunk, size, env)
|
|
158
217
|
end
|
|
159
218
|
state
|
|
160
219
|
rescue StandardError => e
|
|
@@ -61,20 +61,28 @@ module LlmCostTracker
|
|
|
61
61
|
end
|
|
62
62
|
end
|
|
63
63
|
|
|
64
|
+
def provider_for(_request_url)
|
|
65
|
+
"anthropic"
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
DATA_RESIDENCY_GEOS = %w[us].freeze
|
|
69
|
+
STANDARD_EQUIVALENT_SERVICE_TIERS = %w[standard standard_only priority].freeze
|
|
70
|
+
private_constant :DATA_RESIDENCY_GEOS, :STANDARD_EQUIVALENT_SERVICE_TIERS
|
|
71
|
+
|
|
64
72
|
private
|
|
65
73
|
|
|
66
74
|
def stream_usage(events)
|
|
67
|
-
start_usage = find_event_value(events, reverse: true) do |data|
|
|
68
|
-
data.dig("message", "usage") if data["type"] == "message_start"
|
|
69
|
-
end
|
|
70
75
|
latest_delta = find_event_value(events, reverse: true) do |data|
|
|
71
76
|
data["usage"] if data["type"] == "message_delta" && data["usage"].is_a?(Hash)
|
|
72
77
|
end
|
|
78
|
+
return nil unless latest_delta
|
|
73
79
|
|
|
74
|
-
|
|
80
|
+
start_usage = find_event_value(events, reverse: true) do |data|
|
|
81
|
+
data.dig("message", "usage") if data["type"] == "message_start"
|
|
82
|
+
end
|
|
75
83
|
|
|
76
|
-
(start_usage || {}).merge(latest_delta
|
|
77
|
-
delta_val
|
|
84
|
+
(start_usage || {}).merge(latest_delta) do |_key, start_val, delta_val|
|
|
85
|
+
delta_val || start_val
|
|
78
86
|
end
|
|
79
87
|
end
|
|
80
88
|
|
|
@@ -103,6 +111,11 @@ module LlmCostTracker
|
|
|
103
111
|
quantity: server_tool_use["web_search_requests"],
|
|
104
112
|
provider_field: "usage.server_tool_use.web_search_requests"
|
|
105
113
|
),
|
|
114
|
+
service_line_item(
|
|
115
|
+
component_key: :web_fetch_request,
|
|
116
|
+
quantity: server_tool_use["web_fetch_requests"],
|
|
117
|
+
provider_field: "usage.server_tool_use.web_fetch_requests"
|
|
118
|
+
),
|
|
106
119
|
service_line_item(
|
|
107
120
|
component_key: :code_execution_request,
|
|
108
121
|
quantity: server_tool_use["code_execution_requests"],
|
|
@@ -164,10 +177,12 @@ module LlmCostTracker
|
|
|
164
177
|
service_tier = usage&.fetch("service_tier", nil) ||
|
|
165
178
|
response&.fetch("service_tier", nil) ||
|
|
166
179
|
request["service_tier"]
|
|
180
|
+
service_tier = nil if STANDARD_EQUIVALENT_SERVICE_TIERS.include?(service_tier.to_s)
|
|
167
181
|
|
|
168
182
|
modes << Pricing.normalize_mode(speed)
|
|
169
183
|
modes << Pricing.normalize_mode(service_tier)
|
|
170
|
-
|
|
184
|
+
geo = inference_geo(request: request, response: response, usage: usage).downcase
|
|
185
|
+
modes << "data_residency" if DATA_RESIDENCY_GEOS.include?(geo)
|
|
171
186
|
|
|
172
187
|
modes = modes.compact.uniq
|
|
173
188
|
modes.empty? ? nil : modes.join("_")
|
|
@@ -32,7 +32,9 @@ module LlmCostTracker
|
|
|
32
32
|
nil
|
|
33
33
|
end
|
|
34
34
|
|
|
35
|
-
|
|
35
|
+
def auto_enable_stream_usage?(_request_url)
|
|
36
|
+
false
|
|
37
|
+
end
|
|
36
38
|
|
|
37
39
|
def safe_json_parse(body)
|
|
38
40
|
return {} if body.blank?
|
|
@@ -42,6 +44,8 @@ module LlmCostTracker
|
|
|
42
44
|
{}
|
|
43
45
|
end
|
|
44
46
|
|
|
47
|
+
private
|
|
48
|
+
|
|
45
49
|
def uri_matches?(url)
|
|
46
50
|
uri = parsed_uri(url)
|
|
47
51
|
uri ? yield(uri) : false
|
|
@@ -21,7 +21,19 @@ module LlmCostTracker
|
|
|
21
21
|
gb.api.openai.com
|
|
22
22
|
ae.api.openai.com
|
|
23
23
|
].freeze
|
|
24
|
-
TRACKED_PATHS = %w[
|
|
24
|
+
TRACKED_PATHS = %w[
|
|
25
|
+
/v1/chat/completions
|
|
26
|
+
/v1/completions
|
|
27
|
+
/v1/embeddings
|
|
28
|
+
/v1/responses
|
|
29
|
+
/v1/images/generations
|
|
30
|
+
/v1/images/edits
|
|
31
|
+
/v1/images/variations
|
|
32
|
+
/v1/audio/transcriptions
|
|
33
|
+
/v1/audio/translations
|
|
34
|
+
/v1/audio/speech
|
|
35
|
+
/v1/moderations
|
|
36
|
+
].freeze
|
|
25
37
|
|
|
26
38
|
def match?(url)
|
|
27
39
|
match_uri?(url, hosts: HOSTS, exact_paths: TRACKED_PATHS)
|
|
@@ -49,7 +61,9 @@ module LlmCostTracker
|
|
|
49
61
|
)
|
|
50
62
|
end
|
|
51
63
|
|
|
52
|
-
|
|
64
|
+
def auto_enable_stream_usage?(request_url)
|
|
65
|
+
openai_chat_completions_url?(request_url)
|
|
66
|
+
end
|
|
53
67
|
|
|
54
68
|
def provider_for(_request_url)
|
|
55
69
|
"openai"
|
|
@@ -47,13 +47,17 @@ module LlmCostTracker
|
|
|
47
47
|
)
|
|
48
48
|
end
|
|
49
49
|
|
|
50
|
-
|
|
50
|
+
def auto_enable_stream_usage?(request_url)
|
|
51
|
+
openai_chat_completions_url?(request_url)
|
|
52
|
+
end
|
|
51
53
|
|
|
52
54
|
def provider_for(request_url)
|
|
53
55
|
uri = parsed_uri(request_url)
|
|
54
56
|
provider_for_uri(uri) || "openai_compatible"
|
|
55
57
|
end
|
|
56
58
|
|
|
59
|
+
private
|
|
60
|
+
|
|
57
61
|
def provider_for_uri(uri)
|
|
58
62
|
return nil unless uri
|
|
59
63
|
|
|
@@ -8,15 +8,25 @@ module LlmCostTracker
|
|
|
8
8
|
RESPONSE_OUTPUT_COMPONENTS = {
|
|
9
9
|
"web_search_call" => :web_search_request,
|
|
10
10
|
"file_search_call" => :file_search_call,
|
|
11
|
-
"code_interpreter_call" => :container_session
|
|
11
|
+
"code_interpreter_call" => :container_session,
|
|
12
|
+
"mcp_call" => :mcp_call
|
|
12
13
|
}.freeze
|
|
13
14
|
|
|
15
|
+
REASONING_MODEL_PATTERNS = [
|
|
16
|
+
/\Agpt-5(\b|[\d.-])/i,
|
|
17
|
+
/\Ao\d+(\b|[\d.-])/i
|
|
18
|
+
].freeze
|
|
19
|
+
NON_REASONING_GPT5_PATTERN = /\Agpt-5(?:\.\d+)?-chat\b/i
|
|
20
|
+
private_constant :NON_REASONING_GPT5_PATTERN
|
|
21
|
+
|
|
14
22
|
module_function
|
|
15
23
|
|
|
16
|
-
def line_items_from_output(output_items)
|
|
24
|
+
def line_items_from_output(output_items, request: nil, model: nil)
|
|
17
25
|
deduped = {}
|
|
18
26
|
Array(output_items).each { |item| store_output_item(deduped, item) }
|
|
19
|
-
deduped.values
|
|
27
|
+
deduped.values
|
|
28
|
+
.select { |item| billable?(item) }
|
|
29
|
+
.filter_map { |item| build_line_item(item, request: request, model: model) }
|
|
20
30
|
end
|
|
21
31
|
|
|
22
32
|
def billable?(item)
|
|
@@ -31,7 +41,7 @@ module LlmCostTracker
|
|
|
31
41
|
end
|
|
32
42
|
|
|
33
43
|
def store_output_item(output_items, item)
|
|
34
|
-
return unless
|
|
44
|
+
return unless item.is_a?(Hash) && RESPONSE_OUTPUT_COMPONENTS.key?(item["type"])
|
|
35
45
|
|
|
36
46
|
component = RESPONSE_OUTPUT_COMPONENTS[item["type"]]
|
|
37
47
|
key = if component == :container_session && item["container_id"]
|
|
@@ -42,8 +52,10 @@ module LlmCostTracker
|
|
|
42
52
|
output_items[key] = item
|
|
43
53
|
end
|
|
44
54
|
|
|
45
|
-
def build_line_item(item)
|
|
46
|
-
|
|
55
|
+
def build_line_item(item, request: nil, model: nil)
|
|
56
|
+
return nil unless item.is_a?(Hash)
|
|
57
|
+
|
|
58
|
+
component_key = component_key_for(item, request: request, model: model)
|
|
47
59
|
return nil unless component_key
|
|
48
60
|
|
|
49
61
|
provider_item_id = if component_key == :container_session
|
|
@@ -62,6 +74,33 @@ module LlmCostTracker
|
|
|
62
74
|
)
|
|
63
75
|
end
|
|
64
76
|
|
|
77
|
+
def component_key_for(item, request:, model:)
|
|
78
|
+
component = RESPONSE_OUTPUT_COMPONENTS[item["type"]]
|
|
79
|
+
return component unless component == :web_search_request
|
|
80
|
+
return component unless web_search_preview_used?(request)
|
|
81
|
+
|
|
82
|
+
reasoning_model?(model) ? :web_search_preview_request_reasoning : :web_search_preview_request_non_reasoning
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def web_search_preview_used?(request)
|
|
86
|
+
tools = request && (request[:tools] || request["tools"])
|
|
87
|
+
return false unless tools.respond_to?(:each)
|
|
88
|
+
|
|
89
|
+
tools.any? do |tool|
|
|
90
|
+
type = tool.is_a?(Hash) ? (tool[:type] || tool["type"]) : tool
|
|
91
|
+
type.to_s.include?("web_search_preview")
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def reasoning_model?(model)
|
|
96
|
+
return false unless model
|
|
97
|
+
|
|
98
|
+
name = model.to_s.split("/", 2).last
|
|
99
|
+
return false if NON_REASONING_GPT5_PATTERN.match?(name)
|
|
100
|
+
|
|
101
|
+
REASONING_MODEL_PATTERNS.any? { |pattern| pattern.match?(name) }
|
|
102
|
+
end
|
|
103
|
+
|
|
65
104
|
def line_item_details(item)
|
|
66
105
|
{
|
|
67
106
|
"status" => item["status"],
|
|
@@ -70,17 +109,17 @@ module LlmCostTracker
|
|
|
70
109
|
}.compact
|
|
71
110
|
end
|
|
72
111
|
|
|
73
|
-
def openai_service_line_items(response)
|
|
74
|
-
line_items_from_output(response["output"])
|
|
112
|
+
def openai_service_line_items(response, request: nil)
|
|
113
|
+
line_items_from_output(response["output"], request: request, model: response["model"])
|
|
75
114
|
end
|
|
76
115
|
|
|
77
|
-
def openai_stream_service_line_items(events)
|
|
116
|
+
def openai_stream_service_line_items(events, request: nil, model: nil)
|
|
78
117
|
output_items = []
|
|
79
118
|
each_event_data(events) do |data|
|
|
80
119
|
output_items.concat(Array(data.dig("response", "output")))
|
|
81
120
|
output_items << data["item"] if data["item"]
|
|
82
121
|
end
|
|
83
|
-
line_items_from_output(output_items)
|
|
122
|
+
line_items_from_output(output_items, request: request, model: model)
|
|
84
123
|
end
|
|
85
124
|
end
|
|
86
125
|
end
|
|
@@ -9,6 +9,25 @@ module LlmCostTracker
|
|
|
9
9
|
|
|
10
10
|
OPENAI_DATA_RESIDENCY_HOST_PATTERN = /\A[a-z]{2,3}\.api\.openai\.com\z/
|
|
11
11
|
|
|
12
|
+
class << self
|
|
13
|
+
def combined_pricing_mode(host:, model:, service_tier:)
|
|
14
|
+
modes = [Pricing.normalize_mode(service_tier)]
|
|
15
|
+
modes << "data_residency" if regional_processing?(host: host, model: model)
|
|
16
|
+
modes = modes.compact.uniq
|
|
17
|
+
modes.empty? ? nil : modes.join("_")
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def regional_processing?(host:, model:)
|
|
21
|
+
host.to_s.downcase.match?(OPENAI_DATA_RESIDENCY_HOST_PATTERN) && data_residency_model?(model)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def data_residency_model?(model)
|
|
25
|
+
model.to_s.match?(
|
|
26
|
+
/\Agpt-5\.(?:4|5)(?:-(?:mini|nano|pro|codex(?:-mini|-max)?))?(?:-\d{4}-\d{2}-\d{2})?\z/
|
|
27
|
+
)
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
12
31
|
private
|
|
13
32
|
|
|
14
33
|
def parse_openai_usage(request_url:, request_body:, response_status:, response_body:)
|
|
@@ -32,9 +51,9 @@ module LlmCostTracker
|
|
|
32
51
|
service_tier: response["service_tier"] || request["service_tier"]
|
|
33
52
|
),
|
|
34
53
|
model: model,
|
|
35
|
-
token_usage: token_usage(usage: usage, cache_read: cache_read),
|
|
54
|
+
token_usage: token_usage(usage: usage, cache_read: cache_read, model: model),
|
|
36
55
|
usage_source: :response,
|
|
37
|
-
service_line_items: openai_service_line_items(response)
|
|
56
|
+
service_line_items: openai_service_line_items(response, request: request)
|
|
38
57
|
)
|
|
39
58
|
end
|
|
40
59
|
|
|
@@ -42,91 +61,128 @@ module LlmCostTracker
|
|
|
42
61
|
return nil unless response_status == 200
|
|
43
62
|
|
|
44
63
|
request = safe_json_parse(request_body)
|
|
45
|
-
model = find_event_value(events) { |data| data["model"] || data.dig("response", "model") } || request["model"]
|
|
46
64
|
usage = detect_stream_usage(events)
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
65
|
+
context = stream_capture_context(events: events, request: request, request_url: request_url)
|
|
66
|
+
|
|
67
|
+
return build_known_stream_usage(usage: usage, **context) if usage
|
|
68
|
+
|
|
69
|
+
warn_missing_stream_usage(request_url: request_url, request: request)
|
|
70
|
+
build_unknown_stream_usage(**context)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def stream_capture_context(events:, request:, request_url:)
|
|
74
|
+
model = find_event_value(events) do |data|
|
|
75
|
+
data["model"] || data.dig("response", "model") || data.dig("chunk", "model")
|
|
76
|
+
end || request["model"]
|
|
77
|
+
{
|
|
78
|
+
provider: provider_for(request_url),
|
|
50
79
|
model: model,
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
cache_read = cache_read_input_tokens(usage)
|
|
57
|
-
UsageCapture.build(
|
|
58
|
-
provider: provider_for(request_url),
|
|
59
|
-
provider_response_id: response_id,
|
|
60
|
-
pricing_mode: pricing_mode,
|
|
61
|
-
model: model,
|
|
62
|
-
token_usage: token_usage(usage: usage, cache_read: cache_read),
|
|
63
|
-
stream: true,
|
|
64
|
-
usage_source: :stream_final,
|
|
65
|
-
service_line_items: service_line_items
|
|
66
|
-
)
|
|
67
|
-
else
|
|
68
|
-
build_unknown_stream_usage(
|
|
69
|
-
provider: provider_for(request_url),
|
|
80
|
+
provider_response_id: find_event_value(events) do |data|
|
|
81
|
+
data["id"] || data.dig("response", "id") || data.dig("chunk", "id")
|
|
82
|
+
end,
|
|
83
|
+
pricing_mode: pricing_mode(
|
|
84
|
+
request_url: request_url,
|
|
70
85
|
model: model,
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
86
|
+
service_tier: stream_pricing_mode(events) || request["service_tier"]
|
|
87
|
+
),
|
|
88
|
+
service_line_items: openai_stream_service_line_items(events, request: request, model: model)
|
|
89
|
+
}
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def build_known_stream_usage(usage:, provider:, model:, provider_response_id:, pricing_mode:, service_line_items:)
|
|
93
|
+
cache_read = cache_read_input_tokens(usage)
|
|
94
|
+
UsageCapture.build(
|
|
95
|
+
provider: provider,
|
|
96
|
+
provider_response_id: provider_response_id,
|
|
97
|
+
pricing_mode: pricing_mode,
|
|
98
|
+
model: model,
|
|
99
|
+
token_usage: token_usage(usage: usage, cache_read: cache_read, model: model),
|
|
100
|
+
stream: true,
|
|
101
|
+
usage_source: :stream_final,
|
|
102
|
+
service_line_items: service_line_items
|
|
103
|
+
)
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def warn_missing_stream_usage(request_url:, request:)
|
|
107
|
+
return unless request.is_a?(Hash) && request["stream"]
|
|
108
|
+
return unless openai_chat_completions_url?(request_url)
|
|
109
|
+
return if request.dig("stream_options", "include_usage")
|
|
110
|
+
|
|
111
|
+
Logging.warn(
|
|
112
|
+
"OpenAI-compatible chat-completions stream finished without a final usage chunk. " \
|
|
113
|
+
"Set `stream_options: { include_usage: true }` in your request body so the gem can " \
|
|
114
|
+
"record token counts. This call was stored with usage_source=unknown."
|
|
115
|
+
)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def openai_chat_completions_url?(request_url)
|
|
119
|
+
uri = parsed_uri(request_url)
|
|
120
|
+
uri && uri.path.to_s.end_with?("/chat/completions")
|
|
76
121
|
end
|
|
77
122
|
|
|
78
123
|
def detect_stream_usage(events)
|
|
79
124
|
find_event_value(events, reverse: true) do |data|
|
|
80
|
-
usage = data["usage"] || data.dig("response", "usage")
|
|
125
|
+
usage = data["usage"] || data.dig("response", "usage") || data.dig("chunk", "usage")
|
|
81
126
|
usage if usage.is_a?(Hash)
|
|
82
127
|
end
|
|
83
128
|
end
|
|
84
129
|
|
|
85
130
|
def stream_pricing_mode(events)
|
|
86
131
|
find_event_value(events, reverse: true) do |data|
|
|
87
|
-
data["service_tier"] || data.dig("response", "service_tier")
|
|
132
|
+
data["service_tier"] || data.dig("response", "service_tier") || data.dig("chunk", "service_tier")
|
|
88
133
|
end
|
|
89
134
|
end
|
|
90
135
|
|
|
91
136
|
def pricing_mode(request_url:, model:, service_tier:)
|
|
92
|
-
|
|
93
|
-
modes << "data_residency" if openai_regional_processing?(request_url: request_url, model: model)
|
|
94
|
-
modes = modes.compact.uniq
|
|
95
|
-
modes.empty? ? nil : modes.join("_")
|
|
96
|
-
end
|
|
97
|
-
|
|
98
|
-
def openai_regional_processing?(request_url:, model:)
|
|
99
|
-
uri = parsed_uri(request_url)
|
|
100
|
-
return false unless uri&.host.to_s.downcase.match?(OPENAI_DATA_RESIDENCY_HOST_PATTERN)
|
|
101
|
-
|
|
102
|
-
openai_data_residency_model?(model)
|
|
137
|
+
OpenaiUsage.combined_pricing_mode(host: parsed_uri(request_url)&.host, model: model, service_tier: service_tier)
|
|
103
138
|
end
|
|
104
139
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
end
|
|
140
|
+
IMAGE_OUTPUT_MODEL_PATTERN = /\Agpt-image-/i
|
|
141
|
+
private_constant :IMAGE_OUTPUT_MODEL_PATTERN
|
|
108
142
|
|
|
109
|
-
def token_usage(usage:, cache_read:)
|
|
143
|
+
def token_usage(usage:, cache_read:, model: nil)
|
|
110
144
|
audio_input = audio_input_tokens(usage)
|
|
111
145
|
audio_output = audio_output_tokens(usage)
|
|
146
|
+
image_input = image_input_tokens(usage)
|
|
147
|
+
image_output_details = image_output_tokens(usage)
|
|
148
|
+
text_output_details = text_output_tokens(usage)
|
|
149
|
+
raw_output = (usage["completion_tokens"] || usage["output_tokens"]).to_i
|
|
150
|
+
image_output, regular_output_remainder = split_stream_image_output(
|
|
151
|
+
raw_output: raw_output, image_output_details: image_output_details,
|
|
152
|
+
text_output_details: text_output_details, audio_output: audio_output,
|
|
153
|
+
default_to_image: model.to_s.match?(IMAGE_OUTPUT_MODEL_PATTERN)
|
|
154
|
+
)
|
|
112
155
|
|
|
113
156
|
TokenUsage.build(
|
|
114
|
-
input_tokens: regular_input_tokens(
|
|
115
|
-
|
|
157
|
+
input_tokens: regular_input_tokens(
|
|
158
|
+
usage: usage, cache_read: cache_read, audio_input: audio_input, image_input: image_input
|
|
159
|
+
),
|
|
160
|
+
output_tokens: regular_output_remainder,
|
|
116
161
|
total_tokens: usage["total_tokens"],
|
|
117
162
|
cache_read_input_tokens: cache_read,
|
|
118
163
|
audio_input_tokens: audio_input,
|
|
119
164
|
audio_output_tokens: audio_output,
|
|
165
|
+
image_input_tokens: image_input,
|
|
166
|
+
image_output_tokens: image_output,
|
|
120
167
|
hidden_output_tokens: hidden_output_tokens(usage)
|
|
121
168
|
)
|
|
122
169
|
end
|
|
123
170
|
|
|
124
|
-
def
|
|
125
|
-
|
|
171
|
+
def split_stream_image_output(raw_output:, image_output_details:, text_output_details:, audio_output:,
|
|
172
|
+
default_to_image: false)
|
|
173
|
+
if image_output_details.zero? && text_output_details.zero?
|
|
174
|
+
remainder = [raw_output - audio_output, 0].max
|
|
175
|
+
return default_to_image ? [remainder, 0] : [0, remainder]
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
text_output = text_output_details
|
|
179
|
+
text_output = [raw_output - image_output_details - audio_output, 0].max if text_output.zero?
|
|
180
|
+
[image_output_details, text_output]
|
|
126
181
|
end
|
|
127
182
|
|
|
128
|
-
def
|
|
129
|
-
|
|
183
|
+
def regular_input_tokens(usage:, cache_read:, audio_input:, image_input:)
|
|
184
|
+
raw = (usage["prompt_tokens"] || usage["input_tokens"]).to_i
|
|
185
|
+
[raw - cache_read - audio_input - image_input, 0].max
|
|
130
186
|
end
|
|
131
187
|
|
|
132
188
|
def cache_read_input_tokens(usage)
|
|
@@ -149,6 +205,21 @@ module LlmCostTracker
|
|
|
149
205
|
details["audio_tokens"].to_i
|
|
150
206
|
end
|
|
151
207
|
|
|
208
|
+
def image_input_tokens(usage)
|
|
209
|
+
details = input_token_details(usage)
|
|
210
|
+
details["image_tokens"].to_i
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
def image_output_tokens(usage)
|
|
214
|
+
details = output_token_details(usage)
|
|
215
|
+
details["image_tokens"].to_i
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
def text_output_tokens(usage)
|
|
219
|
+
details = output_token_details(usage)
|
|
220
|
+
details["text_tokens"].to_i
|
|
221
|
+
end
|
|
222
|
+
|
|
152
223
|
def input_token_details(usage)
|
|
153
224
|
usage["prompt_tokens_details"] || usage["input_tokens_details"] || usage["input_token_details"] || {}
|
|
154
225
|
end
|