llm_cost_tracker 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +108 -0
  3. data/README.md +12 -5
  4. data/app/assets/llm_cost_tracker/application.css +65 -5
  5. data/app/controllers/llm_cost_tracker/application_controller.rb +25 -33
  6. data/app/controllers/llm_cost_tracker/assets_controller.rb +1 -1
  7. data/app/controllers/llm_cost_tracker/calls_controller.rb +5 -7
  8. data/app/controllers/llm_cost_tracker/data_quality_controller.rb +4 -0
  9. data/app/controllers/llm_cost_tracker/reconciliation_controller.rb +106 -0
  10. data/app/controllers/llm_cost_tracker/tags_controller.rb +15 -1
  11. data/app/helpers/llm_cost_tracker/application_helper.rb +10 -0
  12. data/app/helpers/llm_cost_tracker/inline_style_helper.rb +28 -0
  13. data/app/helpers/llm_cost_tracker/reconciliation_helper.rb +13 -0
  14. data/app/helpers/llm_cost_tracker/token_usage_helper.rb +5 -1
  15. data/app/models/llm_cost_tracker/call.rb +0 -3
  16. data/app/models/llm_cost_tracker/call_line_item.rb +1 -5
  17. data/app/models/llm_cost_tracker/call_rollup.rb +0 -3
  18. data/app/models/llm_cost_tracker/call_tag.rb +0 -4
  19. data/app/models/llm_cost_tracker/ingestion/inbox_entry.rb +0 -4
  20. data/app/models/llm_cost_tracker/ingestion/lease.rb +0 -3
  21. data/app/models/llm_cost_tracker/provider_invoice.rb +7 -3
  22. data/app/models/llm_cost_tracker/provider_invoice_import.rb +24 -0
  23. data/app/services/llm_cost_tracker/dashboard/data_quality.rb +33 -4
  24. data/app/services/llm_cost_tracker/dashboard/filter.rb +6 -4
  25. data/app/views/layouts/llm_cost_tracker/application.html.erb +6 -1
  26. data/app/views/llm_cost_tracker/calls/show.html.erb +25 -40
  27. data/app/views/llm_cost_tracker/dashboard/index.html.erb +9 -9
  28. data/app/views/llm_cost_tracker/data_quality/index.html.erb +91 -52
  29. data/app/views/llm_cost_tracker/reconciliation/index.html.erb +183 -0
  30. data/app/views/llm_cost_tracker/shared/_bar.html.erb +1 -1
  31. data/app/views/llm_cost_tracker/shared/_filters.html.erb +3 -0
  32. data/app/views/llm_cost_tracker/shared/_metric_stack.html.erb +1 -1
  33. data/app/views/llm_cost_tracker/tags/show.html.erb +60 -0
  34. data/config/routes.rb +3 -2
  35. data/lib/llm_cost_tracker/billing/components.rb +45 -3
  36. data/lib/llm_cost_tracker/billing/components.yml +71 -0
  37. data/lib/llm_cost_tracker/billing/line_item.rb +1 -1
  38. data/lib/llm_cost_tracker/budget.rb +4 -2
  39. data/lib/llm_cost_tracker/capture/stream_collector.rb +93 -20
  40. data/lib/llm_cost_tracker/capture/stream_tracker.rb +40 -5
  41. data/lib/llm_cost_tracker/configuration.rb +53 -1
  42. data/lib/llm_cost_tracker/dashboard_setup_state.rb +109 -0
  43. data/lib/llm_cost_tracker/doctor/cost_drift_check.rb +2 -0
  44. data/lib/llm_cost_tracker/doctor/ingestion_check.rb +26 -0
  45. data/lib/llm_cost_tracker/doctor/invoice_reconciliation_check.rb +164 -0
  46. data/lib/llm_cost_tracker/doctor/schema_check.rb +5 -2
  47. data/lib/llm_cost_tracker/doctor.rb +72 -3
  48. data/lib/llm_cost_tracker/engine.rb +9 -0
  49. data/lib/llm_cost_tracker/event.rb +1 -1
  50. data/lib/llm_cost_tracker/generators/llm_cost_tracker/call_rollups_generator.rb +43 -0
  51. data/lib/llm_cost_tracker/generators/llm_cost_tracker/durable_ingestion_generator.rb +43 -0
  52. data/lib/llm_cost_tracker/generators/llm_cost_tracker/install_generator.rb +13 -3
  53. data/lib/llm_cost_tracker/generators/llm_cost_tracker/reconciliation_generator.rb +34 -0
  54. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_call_rollups.rb.erb +15 -0
  55. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_calls.rb.erb +5 -58
  56. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_durable_ingestion.rb.erb +29 -0
  57. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_reconciliation.rb.erb +55 -0
  58. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +28 -25
  59. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_call_rollups_provider.rb.erb +20 -0
  60. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_call_tags_key_value_index.rb.erb +32 -0
  61. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_image_tokens.rb.erb +18 -0
  62. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_call_rollups_provider_generator.rb +38 -0
  63. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_call_tags_key_value_index_generator.rb +30 -0
  64. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_image_tokens_generator.rb +29 -0
  65. data/lib/llm_cost_tracker/ingestion/inbox.rb +0 -1
  66. data/lib/llm_cost_tracker/ingestion/inline.rb +22 -0
  67. data/lib/llm_cost_tracker/ingestion/worker.rb +10 -2
  68. data/lib/llm_cost_tracker/ingestion.rb +48 -10
  69. data/lib/llm_cost_tracker/integrations/anthropic.rb +24 -5
  70. data/lib/llm_cost_tracker/integrations/base.rb +22 -5
  71. data/lib/llm_cost_tracker/integrations/openai.rb +300 -66
  72. data/lib/llm_cost_tracker/integrations/ruby_llm.rb +105 -6
  73. data/lib/llm_cost_tracker/integrations.rb +19 -1
  74. data/lib/llm_cost_tracker/ledger/period/totals.rb +21 -5
  75. data/lib/llm_cost_tracker/ledger/rollups.rb +24 -10
  76. data/lib/llm_cost_tracker/ledger/schema/call_line_items.rb +30 -1
  77. data/lib/llm_cost_tracker/ledger/schema/call_rollups.rb +3 -3
  78. data/lib/llm_cost_tracker/ledger/schema/call_tags.rb +17 -2
  79. data/lib/llm_cost_tracker/ledger/schema/calls.rb +2 -0
  80. data/lib/llm_cost_tracker/ledger/schema/ingestion_inbox_entries.rb +47 -0
  81. data/lib/llm_cost_tracker/ledger/schema/ingestion_leases.rb +42 -0
  82. data/lib/llm_cost_tracker/ledger/schema/provider_invoice_imports.rb +46 -0
  83. data/lib/llm_cost_tracker/ledger/schema/provider_invoices.rb +2 -2
  84. data/lib/llm_cost_tracker/ledger/store.rb +14 -14
  85. data/lib/llm_cost_tracker/ledger/tags/encoding.rb +37 -0
  86. data/lib/llm_cost_tracker/ledger/tags/query.rb +2 -1
  87. data/lib/llm_cost_tracker/ledger.rb +2 -1
  88. data/lib/llm_cost_tracker/masking.rb +39 -0
  89. data/lib/llm_cost_tracker/middleware/faraday.rb +88 -29
  90. data/lib/llm_cost_tracker/parsers/anthropic.rb +22 -7
  91. data/lib/llm_cost_tracker/parsers/base.rb +5 -1
  92. data/lib/llm_cost_tracker/parsers/gemini.rb +4 -0
  93. data/lib/llm_cost_tracker/parsers/openai.rb +16 -2
  94. data/lib/llm_cost_tracker/parsers/openai_compatible.rb +5 -1
  95. data/lib/llm_cost_tracker/parsers/openai_service_charges.rb +49 -10
  96. data/lib/llm_cost_tracker/parsers/openai_usage.rb +124 -53
  97. data/lib/llm_cost_tracker/prices.json +110 -19
  98. data/lib/llm_cost_tracker/pricing/effective_prices.rb +5 -36
  99. data/lib/llm_cost_tracker/pricing/lookup.rb +36 -3
  100. data/lib/llm_cost_tracker/pricing/mode.rb +76 -0
  101. data/lib/llm_cost_tracker/pricing/registry.rb +3 -1
  102. data/lib/llm_cost_tracker/pricing/service_charges.rb +9 -3
  103. data/lib/llm_cost_tracker/pricing/sync/registry_writer.rb +50 -1
  104. data/lib/llm_cost_tracker/pricing/sync.rb +3 -1
  105. data/lib/llm_cost_tracker/pricing.rb +47 -19
  106. data/lib/llm_cost_tracker/railtie.rb +6 -0
  107. data/lib/llm_cost_tracker/reconcile_tasks.rb +134 -0
  108. data/lib/llm_cost_tracker/reconciliation/diff.rb +428 -0
  109. data/lib/llm_cost_tracker/reconciliation/diff_result.rb +48 -0
  110. data/lib/llm_cost_tracker/reconciliation/import_result.rb +19 -0
  111. data/lib/llm_cost_tracker/reconciliation/importer.rb +253 -0
  112. data/lib/llm_cost_tracker/reconciliation/sources/anthropic_usage.rb +171 -0
  113. data/lib/llm_cost_tracker/reconciliation/sources/fingerprint.rb +20 -0
  114. data/lib/llm_cost_tracker/reconciliation/sources/openai_usage.rb +142 -0
  115. data/lib/llm_cost_tracker/reconciliation.rb +118 -0
  116. data/lib/llm_cost_tracker/report/data.rb +4 -1
  117. data/lib/llm_cost_tracker/retention.rb +15 -2
  118. data/lib/llm_cost_tracker/tags/context.rb +3 -4
  119. data/lib/llm_cost_tracker/tags/sanitizer.rb +60 -4
  120. data/lib/llm_cost_tracker/token_usage.rb +10 -2
  121. data/lib/llm_cost_tracker/tracker.rb +45 -18
  122. data/lib/llm_cost_tracker/version.rb +1 -1
  123. data/lib/llm_cost_tracker.rb +9 -0
  124. data/lib/tasks/llm_cost_tracker.rake +25 -2
  125. metadata +36 -1
@@ -2,6 +2,7 @@
2
2
 
3
3
  require "faraday"
4
4
  require "json"
5
+ require "stringio"
5
6
  require "uri"
6
7
 
7
8
  require_relative "../logging"
@@ -23,28 +24,82 @@ module LlmCostTracker
23
24
  request_body = read_body(request_env.body)
24
25
  parser = Parsers.find_for(request_url)
25
26
  streaming = parser&.streaming_request?(request_url, request_body)
27
+ request_body = inject_stream_usage_flag(request_env, parser, request_url) if streaming
26
28
  stream_buffer = install_stream_tap(request_env) if streaming
27
29
 
28
30
  Tracker.enforce_budget! if parser
29
31
  context_tags, metadata = tag_snapshot(request_env) if parser
30
32
  started_at = LlmCostTracker::Timing.now_monotonic
31
33
 
34
+ invoke_app_with_capture(
35
+ request_env: request_env, parser: parser, request_url: request_url,
36
+ request_body: request_body, streaming: streaming, stream_buffer: stream_buffer,
37
+ context_tags: context_tags, metadata: metadata, started_at: started_at
38
+ )
39
+ end
40
+
41
+ private
42
+
43
+ def invoke_app_with_capture(request_env:, parser:, request_url:, request_body:, streaming:,
44
+ stream_buffer:, context_tags:, metadata:, started_at:)
45
+ response_received = false
32
46
  @app.call(request_env).on_complete do |response_env|
47
+ response_received = true
33
48
  process(
34
- parser: parser,
35
- request_url: request_url,
36
- request_body: request_body,
37
- response_env: response_env,
49
+ parser: parser, request_url: request_url, request_body: request_body,
50
+ response_env: response_env, latency_ms: LlmCostTracker::Timing.elapsed_ms(started_at),
51
+ streaming: streaming, stream_buffer: stream_buffer,
52
+ context_tags: context_tags, metadata: metadata
53
+ )
54
+ end
55
+ rescue StandardError => e
56
+ if streaming && parser && !response_received
57
+ process_interrupted_stream(
58
+ parser: parser, request_url: request_url, request_body: request_body,
38
59
  latency_ms: LlmCostTracker::Timing.elapsed_ms(started_at),
39
- streaming: streaming,
40
- stream_buffer: stream_buffer,
41
- context_tags: context_tags,
42
- metadata: metadata
60
+ context_tags: context_tags, metadata: metadata, error: e
43
61
  )
44
62
  end
63
+ raise
45
64
  end
46
65
 
47
- private
66
+ def inject_stream_usage_flag(request_env, parser, request_url)
67
+ body_string = read_body(request_env.body)
68
+ return body_string unless LlmCostTracker.configuration.auto_enable_stream_usage
69
+ return body_string unless parser&.auto_enable_stream_usage?(request_url)
70
+
71
+ body = JSON.parse(body_string)
72
+ return body_string if body["stream_options"].is_a?(Hash) && body["stream_options"].key?("include_usage")
73
+
74
+ body["stream_options"] = (body["stream_options"] || {}).merge("include_usage" => true)
75
+ new_body = body.to_json
76
+ request_env.body = new_body
77
+ new_body
78
+ end
79
+
80
+ def process_interrupted_stream(parser:, request_url:, request_body:, latency_ms:,
81
+ context_tags:, metadata:, error:)
82
+ request = parser.safe_json_parse(request_body)
83
+ capture = UsageCapture.build(
84
+ provider: parser.provider_for(request_url),
85
+ model: request["model"] || UsageCapture::UNKNOWN_MODEL,
86
+ token_usage: TokenUsage.build(input_tokens: 0, output_tokens: 0, total_tokens: 0),
87
+ stream: true,
88
+ usage_source: :unknown
89
+ )
90
+ merged_metadata = (metadata || {}).merge(
91
+ stream_interrupted: true,
92
+ stream_interrupted_error: "#{error.class}: #{error.message}"
93
+ )
94
+ Tracker.record(
95
+ capture: capture,
96
+ latency_ms: latency_ms,
97
+ metadata: merged_metadata,
98
+ context_tags: context_tags
99
+ )
100
+ rescue StandardError => e
101
+ Logging.warn("Error recording interrupted stream: #{e.class}: #{e.message}")
102
+ end
48
103
 
49
104
  def process(parser:, request_url:, request_body:, response_env:,
50
105
  latency_ms:, streaming:, stream_buffer:, context_tags:, metadata:)
@@ -102,21 +157,14 @@ module LlmCostTracker
102
157
  end
103
158
 
104
159
  def parse_stream(parser:, request_url:, request_body:, response_env:, stream_buffer:)
105
- if stream_buffer&.dig(:overflowed)
106
- Logging.warn(capture_warning(request_url, stream_buffer))
107
- return parser.parse_stream(
108
- request_url: request_url,
109
- request_body: request_body,
110
- response_status: response_env.status,
111
- response_headers: response_env.response_headers
112
- )
113
- end
160
+ overflowed = stream_buffer&.dig(:overflowed) == true
161
+ Logging.warn(capture_warning(request_url, stream_buffer)) if overflowed
114
162
 
115
163
  body = stream_buffer&.dig(:buffer)&.string
116
164
  body = read_body(response_env.body) if body.blank?
117
165
 
118
166
  if body.blank?
119
- Logging.warn(capture_warning(request_url, stream_buffer))
167
+ Logging.warn(capture_warning(request_url, stream_buffer)) unless overflowed
120
168
  return parser.parse_stream(
121
169
  request_url: request_url,
122
170
  request_body: request_body,
@@ -125,7 +173,7 @@ module LlmCostTracker
125
173
  )
126
174
  end
127
175
 
128
- events = Parsers::SSE.parse(body)
176
+ events = overflowed ? [] : Parsers::SSE.parse(body)
129
177
  parser.parse_stream(
130
178
  request_url: request_url,
131
179
  request_body: request_body,
@@ -135,6 +183,17 @@ module LlmCostTracker
135
183
  )
136
184
  end
137
185
 
186
+ def forward_on_data_chunk(callable, chunk, size, env)
187
+ arity = callable.arity
188
+ return callable.call(chunk, size, env) if arity.negative?
189
+
190
+ case arity
191
+ when 0, 1 then callable.call(chunk)
192
+ when 2 then callable.call(chunk, size)
193
+ else callable.call(chunk, size, env)
194
+ end
195
+ end
196
+
138
197
  def install_stream_tap(request_env)
139
198
  request = request_env.request
140
199
  return nil unless request
@@ -145,16 +204,16 @@ module LlmCostTracker
145
204
  state = { buffer: StringIO.new, bytes: 0, overflowed: false }
146
205
  request.on_data = proc do |chunk, size, env|
147
206
  chunk = chunk.to_s
148
- unless state[:overflowed]
149
- if state[:bytes] + chunk.bytesize <= Capture::Stream::LIMIT_BYTES
150
- state[:buffer] << chunk
151
- state[:bytes] += chunk.bytesize
152
- else
153
- state[:overflowed] = true
154
- state[:buffer] = nil
155
- end
207
+ remaining = Capture::Stream::LIMIT_BYTES - state[:bytes]
208
+ if chunk.bytesize <= remaining
209
+ state[:buffer] << chunk
210
+ state[:bytes] += chunk.bytesize
211
+ else
212
+ state[:buffer] << chunk.byteslice(0, remaining) if remaining.positive?
213
+ state[:bytes] += [remaining, 0].max
214
+ state[:overflowed] = true
156
215
  end
157
- original.call(chunk, size, env)
216
+ forward_on_data_chunk(original, chunk, size, env)
158
217
  end
159
218
  state
160
219
  rescue StandardError => e
@@ -61,20 +61,28 @@ module LlmCostTracker
61
61
  end
62
62
  end
63
63
 
64
+ def provider_for(_request_url)
65
+ "anthropic"
66
+ end
67
+
68
+ DATA_RESIDENCY_GEOS = %w[us].freeze
69
+ STANDARD_EQUIVALENT_SERVICE_TIERS = %w[standard standard_only priority].freeze
70
+ private_constant :DATA_RESIDENCY_GEOS, :STANDARD_EQUIVALENT_SERVICE_TIERS
71
+
64
72
  private
65
73
 
66
74
  def stream_usage(events)
67
- start_usage = find_event_value(events, reverse: true) do |data|
68
- data.dig("message", "usage") if data["type"] == "message_start"
69
- end
70
75
  latest_delta = find_event_value(events, reverse: true) do |data|
71
76
  data["usage"] if data["type"] == "message_delta" && data["usage"].is_a?(Hash)
72
77
  end
78
+ return nil unless latest_delta
73
79
 
74
- return nil unless start_usage || latest_delta
80
+ start_usage = find_event_value(events, reverse: true) do |data|
81
+ data.dig("message", "usage") if data["type"] == "message_start"
82
+ end
75
83
 
76
- (start_usage || {}).merge(latest_delta || {}) do |_key, start_val, delta_val|
77
- delta_val.nil? ? start_val : delta_val
84
+ (start_usage || {}).merge(latest_delta) do |_key, start_val, delta_val|
85
+ delta_val || start_val
78
86
  end
79
87
  end
80
88
 
@@ -103,6 +111,11 @@ module LlmCostTracker
103
111
  quantity: server_tool_use["web_search_requests"],
104
112
  provider_field: "usage.server_tool_use.web_search_requests"
105
113
  ),
114
+ service_line_item(
115
+ component_key: :web_fetch_request,
116
+ quantity: server_tool_use["web_fetch_requests"],
117
+ provider_field: "usage.server_tool_use.web_fetch_requests"
118
+ ),
106
119
  service_line_item(
107
120
  component_key: :code_execution_request,
108
121
  quantity: server_tool_use["code_execution_requests"],
@@ -164,10 +177,12 @@ module LlmCostTracker
164
177
  service_tier = usage&.fetch("service_tier", nil) ||
165
178
  response&.fetch("service_tier", nil) ||
166
179
  request["service_tier"]
180
+ service_tier = nil if STANDARD_EQUIVALENT_SERVICE_TIERS.include?(service_tier.to_s)
167
181
 
168
182
  modes << Pricing.normalize_mode(speed)
169
183
  modes << Pricing.normalize_mode(service_tier)
170
- modes << "data_residency" if inference_geo(request: request, response: response, usage: usage) == "us"
184
+ geo = inference_geo(request: request, response: response, usage: usage).downcase
185
+ modes << "data_residency" if DATA_RESIDENCY_GEOS.include?(geo)
171
186
 
172
187
  modes = modes.compact.uniq
173
188
  modes.empty? ? nil : modes.join("_")
@@ -32,7 +32,9 @@ module LlmCostTracker
32
32
  nil
33
33
  end
34
34
 
35
- private
35
+ def auto_enable_stream_usage?(_request_url)
36
+ false
37
+ end
36
38
 
37
39
  def safe_json_parse(body)
38
40
  return {} if body.blank?
@@ -42,6 +44,8 @@ module LlmCostTracker
42
44
  {}
43
45
  end
44
46
 
47
+ private
48
+
45
49
  def uri_matches?(url)
46
50
  uri = parsed_uri(url)
47
51
  uri ? yield(uri) : false
@@ -75,6 +75,10 @@ module LlmCostTracker
75
75
  end
76
76
  end
77
77
 
78
+ def provider_for(_request_url)
79
+ "gemini"
80
+ end
81
+
78
82
  private
79
83
 
80
84
  def build_usage_capture(request_url:, usage:, usage_source:, stream: false, provider_response_id: nil,
@@ -21,7 +21,19 @@ module LlmCostTracker
21
21
  gb.api.openai.com
22
22
  ae.api.openai.com
23
23
  ].freeze
24
- TRACKED_PATHS = %w[/v1/chat/completions /v1/completions /v1/embeddings /v1/responses].freeze
24
+ TRACKED_PATHS = %w[
25
+ /v1/chat/completions
26
+ /v1/completions
27
+ /v1/embeddings
28
+ /v1/responses
29
+ /v1/images/generations
30
+ /v1/images/edits
31
+ /v1/images/variations
32
+ /v1/audio/transcriptions
33
+ /v1/audio/translations
34
+ /v1/audio/speech
35
+ /v1/moderations
36
+ ].freeze
25
37
 
26
38
  def match?(url)
27
39
  match_uri?(url, hosts: HOSTS, exact_paths: TRACKED_PATHS)
@@ -49,7 +61,9 @@ module LlmCostTracker
49
61
  )
50
62
  end
51
63
 
52
- private
64
+ def auto_enable_stream_usage?(request_url)
65
+ openai_chat_completions_url?(request_url)
66
+ end
53
67
 
54
68
  def provider_for(_request_url)
55
69
  "openai"
@@ -47,13 +47,17 @@ module LlmCostTracker
47
47
  )
48
48
  end
49
49
 
50
- private
50
+ def auto_enable_stream_usage?(request_url)
51
+ openai_chat_completions_url?(request_url)
52
+ end
51
53
 
52
54
  def provider_for(request_url)
53
55
  uri = parsed_uri(request_url)
54
56
  provider_for_uri(uri) || "openai_compatible"
55
57
  end
56
58
 
59
+ private
60
+
57
61
  def provider_for_uri(uri)
58
62
  return nil unless uri
59
63
 
@@ -8,15 +8,25 @@ module LlmCostTracker
8
8
  RESPONSE_OUTPUT_COMPONENTS = {
9
9
  "web_search_call" => :web_search_request,
10
10
  "file_search_call" => :file_search_call,
11
- "code_interpreter_call" => :container_session
11
+ "code_interpreter_call" => :container_session,
12
+ "mcp_call" => :mcp_call
12
13
  }.freeze
13
14
 
15
+ REASONING_MODEL_PATTERNS = [
16
+ /\Agpt-5(\b|[\d.-])/i,
17
+ /\Ao\d+(\b|[\d.-])/i
18
+ ].freeze
19
+ NON_REASONING_GPT5_PATTERN = /\Agpt-5(?:\.\d+)?-chat\b/i
20
+ private_constant :NON_REASONING_GPT5_PATTERN
21
+
14
22
  module_function
15
23
 
16
- def line_items_from_output(output_items)
24
+ def line_items_from_output(output_items, request: nil, model: nil)
17
25
  deduped = {}
18
26
  Array(output_items).each { |item| store_output_item(deduped, item) }
19
- deduped.values.filter_map { |item| build_line_item(item) }
27
+ deduped.values
28
+ .select { |item| billable?(item) }
29
+ .filter_map { |item| build_line_item(item, request: request, model: model) }
20
30
  end
21
31
 
22
32
  def billable?(item)
@@ -31,7 +41,7 @@ module LlmCostTracker
31
41
  end
32
42
 
33
43
  def store_output_item(output_items, item)
34
- return unless billable?(item)
44
+ return unless item.is_a?(Hash) && RESPONSE_OUTPUT_COMPONENTS.key?(item["type"])
35
45
 
36
46
  component = RESPONSE_OUTPUT_COMPONENTS[item["type"]]
37
47
  key = if component == :container_session && item["container_id"]
@@ -42,8 +52,10 @@ module LlmCostTracker
42
52
  output_items[key] = item
43
53
  end
44
54
 
45
- def build_line_item(item)
46
- component_key = RESPONSE_OUTPUT_COMPONENTS[item["type"]]
55
+ def build_line_item(item, request: nil, model: nil)
56
+ return nil unless item.is_a?(Hash)
57
+
58
+ component_key = component_key_for(item, request: request, model: model)
47
59
  return nil unless component_key
48
60
 
49
61
  provider_item_id = if component_key == :container_session
@@ -62,6 +74,33 @@ module LlmCostTracker
62
74
  )
63
75
  end
64
76
 
77
+ def component_key_for(item, request:, model:)
78
+ component = RESPONSE_OUTPUT_COMPONENTS[item["type"]]
79
+ return component unless component == :web_search_request
80
+ return component unless web_search_preview_used?(request)
81
+
82
+ reasoning_model?(model) ? :web_search_preview_request_reasoning : :web_search_preview_request_non_reasoning
83
+ end
84
+
85
+ def web_search_preview_used?(request)
86
+ tools = request && (request[:tools] || request["tools"])
87
+ return false unless tools.respond_to?(:each)
88
+
89
+ tools.any? do |tool|
90
+ type = tool.is_a?(Hash) ? (tool[:type] || tool["type"]) : tool
91
+ type.to_s.include?("web_search_preview")
92
+ end
93
+ end
94
+
95
+ def reasoning_model?(model)
96
+ return false unless model
97
+
98
+ name = model.to_s.split("/", 2).last
99
+ return false if NON_REASONING_GPT5_PATTERN.match?(name)
100
+
101
+ REASONING_MODEL_PATTERNS.any? { |pattern| pattern.match?(name) }
102
+ end
103
+
65
104
  def line_item_details(item)
66
105
  {
67
106
  "status" => item["status"],
@@ -70,17 +109,17 @@ module LlmCostTracker
70
109
  }.compact
71
110
  end
72
111
 
73
- def openai_service_line_items(response)
74
- line_items_from_output(response["output"])
112
+ def openai_service_line_items(response, request: nil)
113
+ line_items_from_output(response["output"], request: request, model: response["model"])
75
114
  end
76
115
 
77
- def openai_stream_service_line_items(events)
116
+ def openai_stream_service_line_items(events, request: nil, model: nil)
78
117
  output_items = []
79
118
  each_event_data(events) do |data|
80
119
  output_items.concat(Array(data.dig("response", "output")))
81
120
  output_items << data["item"] if data["item"]
82
121
  end
83
- line_items_from_output(output_items)
122
+ line_items_from_output(output_items, request: request, model: model)
84
123
  end
85
124
  end
86
125
  end
@@ -9,6 +9,25 @@ module LlmCostTracker
9
9
 
10
10
  OPENAI_DATA_RESIDENCY_HOST_PATTERN = /\A[a-z]{2,3}\.api\.openai\.com\z/
11
11
 
12
+ class << self
13
+ def combined_pricing_mode(host:, model:, service_tier:)
14
+ modes = [Pricing.normalize_mode(service_tier)]
15
+ modes << "data_residency" if regional_processing?(host: host, model: model)
16
+ modes = modes.compact.uniq
17
+ modes.empty? ? nil : modes.join("_")
18
+ end
19
+
20
+ def regional_processing?(host:, model:)
21
+ host.to_s.downcase.match?(OPENAI_DATA_RESIDENCY_HOST_PATTERN) && data_residency_model?(model)
22
+ end
23
+
24
+ def data_residency_model?(model)
25
+ model.to_s.match?(
26
+ /\Agpt-5\.(?:4|5)(?:-(?:mini|nano|pro|codex(?:-mini|-max)?))?(?:-\d{4}-\d{2}-\d{2})?\z/
27
+ )
28
+ end
29
+ end
30
+
12
31
  private
13
32
 
14
33
  def parse_openai_usage(request_url:, request_body:, response_status:, response_body:)
@@ -32,9 +51,9 @@ module LlmCostTracker
32
51
  service_tier: response["service_tier"] || request["service_tier"]
33
52
  ),
34
53
  model: model,
35
- token_usage: token_usage(usage: usage, cache_read: cache_read),
54
+ token_usage: token_usage(usage: usage, cache_read: cache_read, model: model),
36
55
  usage_source: :response,
37
- service_line_items: openai_service_line_items(response)
56
+ service_line_items: openai_service_line_items(response, request: request)
38
57
  )
39
58
  end
40
59
 
@@ -42,91 +61,128 @@ module LlmCostTracker
42
61
  return nil unless response_status == 200
43
62
 
44
63
  request = safe_json_parse(request_body)
45
- model = find_event_value(events) { |data| data["model"] || data.dig("response", "model") } || request["model"]
46
64
  usage = detect_stream_usage(events)
47
- response_id = find_event_value(events) { |data| data["id"] || data.dig("response", "id") }
48
- pricing_mode = pricing_mode(
49
- request_url: request_url,
65
+ context = stream_capture_context(events: events, request: request, request_url: request_url)
66
+
67
+ return build_known_stream_usage(usage: usage, **context) if usage
68
+
69
+ warn_missing_stream_usage(request_url: request_url, request: request)
70
+ build_unknown_stream_usage(**context)
71
+ end
72
+
73
+ def stream_capture_context(events:, request:, request_url:)
74
+ model = find_event_value(events) do |data|
75
+ data["model"] || data.dig("response", "model") || data.dig("chunk", "model")
76
+ end || request["model"]
77
+ {
78
+ provider: provider_for(request_url),
50
79
  model: model,
51
- service_tier: stream_pricing_mode(events) || request["service_tier"]
52
- )
53
- service_line_items = openai_stream_service_line_items(events)
54
-
55
- if usage
56
- cache_read = cache_read_input_tokens(usage)
57
- UsageCapture.build(
58
- provider: provider_for(request_url),
59
- provider_response_id: response_id,
60
- pricing_mode: pricing_mode,
61
- model: model,
62
- token_usage: token_usage(usage: usage, cache_read: cache_read),
63
- stream: true,
64
- usage_source: :stream_final,
65
- service_line_items: service_line_items
66
- )
67
- else
68
- build_unknown_stream_usage(
69
- provider: provider_for(request_url),
80
+ provider_response_id: find_event_value(events) do |data|
81
+ data["id"] || data.dig("response", "id") || data.dig("chunk", "id")
82
+ end,
83
+ pricing_mode: pricing_mode(
84
+ request_url: request_url,
70
85
  model: model,
71
- provider_response_id: response_id,
72
- pricing_mode: pricing_mode,
73
- service_line_items: service_line_items
74
- )
75
- end
86
+ service_tier: stream_pricing_mode(events) || request["service_tier"]
87
+ ),
88
+ service_line_items: openai_stream_service_line_items(events, request: request, model: model)
89
+ }
90
+ end
91
+
92
+ def build_known_stream_usage(usage:, provider:, model:, provider_response_id:, pricing_mode:, service_line_items:)
93
+ cache_read = cache_read_input_tokens(usage)
94
+ UsageCapture.build(
95
+ provider: provider,
96
+ provider_response_id: provider_response_id,
97
+ pricing_mode: pricing_mode,
98
+ model: model,
99
+ token_usage: token_usage(usage: usage, cache_read: cache_read, model: model),
100
+ stream: true,
101
+ usage_source: :stream_final,
102
+ service_line_items: service_line_items
103
+ )
104
+ end
105
+
106
+ def warn_missing_stream_usage(request_url:, request:)
107
+ return unless request.is_a?(Hash) && request["stream"]
108
+ return unless openai_chat_completions_url?(request_url)
109
+ return if request.dig("stream_options", "include_usage")
110
+
111
+ Logging.warn(
112
+ "OpenAI-compatible chat-completions stream finished without a final usage chunk. " \
113
+ "Set `stream_options: { include_usage: true }` in your request body so the gem can " \
114
+ "record token counts. This call was stored with usage_source=unknown."
115
+ )
116
+ end
117
+
118
+ def openai_chat_completions_url?(request_url)
119
+ uri = parsed_uri(request_url)
120
+ uri && uri.path.to_s.end_with?("/chat/completions")
76
121
  end
77
122
 
78
123
  def detect_stream_usage(events)
79
124
  find_event_value(events, reverse: true) do |data|
80
- usage = data["usage"] || data.dig("response", "usage")
125
+ usage = data["usage"] || data.dig("response", "usage") || data.dig("chunk", "usage")
81
126
  usage if usage.is_a?(Hash)
82
127
  end
83
128
  end
84
129
 
85
130
  def stream_pricing_mode(events)
86
131
  find_event_value(events, reverse: true) do |data|
87
- data["service_tier"] || data.dig("response", "service_tier")
132
+ data["service_tier"] || data.dig("response", "service_tier") || data.dig("chunk", "service_tier")
88
133
  end
89
134
  end
90
135
 
91
136
  def pricing_mode(request_url:, model:, service_tier:)
92
- modes = [Pricing.normalize_mode(service_tier)]
93
- modes << "data_residency" if openai_regional_processing?(request_url: request_url, model: model)
94
- modes = modes.compact.uniq
95
- modes.empty? ? nil : modes.join("_")
96
- end
97
-
98
- def openai_regional_processing?(request_url:, model:)
99
- uri = parsed_uri(request_url)
100
- return false unless uri&.host.to_s.downcase.match?(OPENAI_DATA_RESIDENCY_HOST_PATTERN)
101
-
102
- openai_data_residency_model?(model)
137
+ OpenaiUsage.combined_pricing_mode(host: parsed_uri(request_url)&.host, model: model, service_tier: service_tier)
103
138
  end
104
139
 
105
- def openai_data_residency_model?(model)
106
- model.to_s.match?(/\Agpt-5\.(?:4|5)(?:-(?:mini|nano|pro))?(?:-\d{4}-\d{2}-\d{2})?\z/)
107
- end
140
+ IMAGE_OUTPUT_MODEL_PATTERN = /\Agpt-image-/i
141
+ private_constant :IMAGE_OUTPUT_MODEL_PATTERN
108
142
 
109
- def token_usage(usage:, cache_read:)
143
+ def token_usage(usage:, cache_read:, model: nil)
110
144
  audio_input = audio_input_tokens(usage)
111
145
  audio_output = audio_output_tokens(usage)
146
+ image_input = image_input_tokens(usage)
147
+ image_output_details = image_output_tokens(usage)
148
+ text_output_details = text_output_tokens(usage)
149
+ raw_output = (usage["completion_tokens"] || usage["output_tokens"]).to_i
150
+ image_output, regular_output_remainder = split_stream_image_output(
151
+ raw_output: raw_output, image_output_details: image_output_details,
152
+ text_output_details: text_output_details, audio_output: audio_output,
153
+ default_to_image: model.to_s.match?(IMAGE_OUTPUT_MODEL_PATTERN)
154
+ )
112
155
 
113
156
  TokenUsage.build(
114
- input_tokens: regular_input_tokens(usage: usage, cache_read: cache_read, audio_input: audio_input),
115
- output_tokens: regular_output_tokens(usage: usage, audio_output: audio_output),
157
+ input_tokens: regular_input_tokens(
158
+ usage: usage, cache_read: cache_read, audio_input: audio_input, image_input: image_input
159
+ ),
160
+ output_tokens: regular_output_remainder,
116
161
  total_tokens: usage["total_tokens"],
117
162
  cache_read_input_tokens: cache_read,
118
163
  audio_input_tokens: audio_input,
119
164
  audio_output_tokens: audio_output,
165
+ image_input_tokens: image_input,
166
+ image_output_tokens: image_output,
120
167
  hidden_output_tokens: hidden_output_tokens(usage)
121
168
  )
122
169
  end
123
170
 
124
- def regular_input_tokens(usage:, cache_read:, audio_input:)
125
- [(usage["prompt_tokens"] || usage["input_tokens"]).to_i - cache_read - audio_input, 0].max
171
+ def split_stream_image_output(raw_output:, image_output_details:, text_output_details:, audio_output:,
172
+ default_to_image: false)
173
+ if image_output_details.zero? && text_output_details.zero?
174
+ remainder = [raw_output - audio_output, 0].max
175
+ return default_to_image ? [remainder, 0] : [0, remainder]
176
+ end
177
+
178
+ text_output = text_output_details
179
+ text_output = [raw_output - image_output_details - audio_output, 0].max if text_output.zero?
180
+ [image_output_details, text_output]
126
181
  end
127
182
 
128
- def regular_output_tokens(usage:, audio_output:)
129
- [(usage["completion_tokens"] || usage["output_tokens"]).to_i - audio_output, 0].max
183
+ def regular_input_tokens(usage:, cache_read:, audio_input:, image_input:)
184
+ raw = (usage["prompt_tokens"] || usage["input_tokens"]).to_i
185
+ [raw - cache_read - audio_input - image_input, 0].max
130
186
  end
131
187
 
132
188
  def cache_read_input_tokens(usage)
@@ -149,6 +205,21 @@ module LlmCostTracker
149
205
  details["audio_tokens"].to_i
150
206
  end
151
207
 
208
+ def image_input_tokens(usage)
209
+ details = input_token_details(usage)
210
+ details["image_tokens"].to_i
211
+ end
212
+
213
+ def image_output_tokens(usage)
214
+ details = output_token_details(usage)
215
+ details["image_tokens"].to_i
216
+ end
217
+
218
+ def text_output_tokens(usage)
219
+ details = output_token_details(usage)
220
+ details["text_tokens"].to_i
221
+ end
222
+
152
223
  def input_token_details(usage)
153
224
  usage["prompt_tokens_details"] || usage["input_tokens_details"] || usage["input_token_details"] || {}
154
225
  end