llm_cost_tracker 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +108 -0
  3. data/README.md +12 -5
  4. data/app/assets/llm_cost_tracker/application.css +65 -5
  5. data/app/controllers/llm_cost_tracker/application_controller.rb +25 -33
  6. data/app/controllers/llm_cost_tracker/assets_controller.rb +1 -1
  7. data/app/controllers/llm_cost_tracker/calls_controller.rb +5 -7
  8. data/app/controllers/llm_cost_tracker/data_quality_controller.rb +4 -0
  9. data/app/controllers/llm_cost_tracker/reconciliation_controller.rb +106 -0
  10. data/app/controllers/llm_cost_tracker/tags_controller.rb +15 -1
  11. data/app/helpers/llm_cost_tracker/application_helper.rb +10 -0
  12. data/app/helpers/llm_cost_tracker/inline_style_helper.rb +28 -0
  13. data/app/helpers/llm_cost_tracker/reconciliation_helper.rb +13 -0
  14. data/app/helpers/llm_cost_tracker/token_usage_helper.rb +5 -1
  15. data/app/models/llm_cost_tracker/call.rb +0 -3
  16. data/app/models/llm_cost_tracker/call_line_item.rb +1 -5
  17. data/app/models/llm_cost_tracker/call_rollup.rb +0 -3
  18. data/app/models/llm_cost_tracker/call_tag.rb +0 -4
  19. data/app/models/llm_cost_tracker/ingestion/inbox_entry.rb +0 -4
  20. data/app/models/llm_cost_tracker/ingestion/lease.rb +0 -3
  21. data/app/models/llm_cost_tracker/provider_invoice.rb +7 -3
  22. data/app/models/llm_cost_tracker/provider_invoice_import.rb +24 -0
  23. data/app/services/llm_cost_tracker/dashboard/data_quality.rb +33 -4
  24. data/app/services/llm_cost_tracker/dashboard/filter.rb +6 -4
  25. data/app/views/layouts/llm_cost_tracker/application.html.erb +6 -1
  26. data/app/views/llm_cost_tracker/calls/show.html.erb +25 -40
  27. data/app/views/llm_cost_tracker/dashboard/index.html.erb +9 -9
  28. data/app/views/llm_cost_tracker/data_quality/index.html.erb +91 -52
  29. data/app/views/llm_cost_tracker/reconciliation/index.html.erb +183 -0
  30. data/app/views/llm_cost_tracker/shared/_bar.html.erb +1 -1
  31. data/app/views/llm_cost_tracker/shared/_filters.html.erb +3 -0
  32. data/app/views/llm_cost_tracker/shared/_metric_stack.html.erb +1 -1
  33. data/app/views/llm_cost_tracker/tags/show.html.erb +60 -0
  34. data/config/routes.rb +3 -2
  35. data/lib/llm_cost_tracker/billing/components.rb +45 -3
  36. data/lib/llm_cost_tracker/billing/components.yml +71 -0
  37. data/lib/llm_cost_tracker/billing/line_item.rb +1 -1
  38. data/lib/llm_cost_tracker/budget.rb +4 -2
  39. data/lib/llm_cost_tracker/capture/stream_collector.rb +93 -20
  40. data/lib/llm_cost_tracker/capture/stream_tracker.rb +40 -5
  41. data/lib/llm_cost_tracker/configuration.rb +53 -1
  42. data/lib/llm_cost_tracker/dashboard_setup_state.rb +109 -0
  43. data/lib/llm_cost_tracker/doctor/cost_drift_check.rb +2 -0
  44. data/lib/llm_cost_tracker/doctor/ingestion_check.rb +26 -0
  45. data/lib/llm_cost_tracker/doctor/invoice_reconciliation_check.rb +164 -0
  46. data/lib/llm_cost_tracker/doctor/schema_check.rb +5 -2
  47. data/lib/llm_cost_tracker/doctor.rb +72 -3
  48. data/lib/llm_cost_tracker/engine.rb +9 -0
  49. data/lib/llm_cost_tracker/event.rb +1 -1
  50. data/lib/llm_cost_tracker/generators/llm_cost_tracker/call_rollups_generator.rb +43 -0
  51. data/lib/llm_cost_tracker/generators/llm_cost_tracker/durable_ingestion_generator.rb +43 -0
  52. data/lib/llm_cost_tracker/generators/llm_cost_tracker/install_generator.rb +13 -3
  53. data/lib/llm_cost_tracker/generators/llm_cost_tracker/reconciliation_generator.rb +34 -0
  54. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_call_rollups.rb.erb +15 -0
  55. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_calls.rb.erb +5 -58
  56. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_durable_ingestion.rb.erb +29 -0
  57. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_reconciliation.rb.erb +55 -0
  58. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +28 -25
  59. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_call_rollups_provider.rb.erb +20 -0
  60. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_call_tags_key_value_index.rb.erb +32 -0
  61. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_image_tokens.rb.erb +18 -0
  62. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_call_rollups_provider_generator.rb +38 -0
  63. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_call_tags_key_value_index_generator.rb +30 -0
  64. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_image_tokens_generator.rb +29 -0
  65. data/lib/llm_cost_tracker/ingestion/inbox.rb +0 -1
  66. data/lib/llm_cost_tracker/ingestion/inline.rb +22 -0
  67. data/lib/llm_cost_tracker/ingestion/worker.rb +10 -2
  68. data/lib/llm_cost_tracker/ingestion.rb +48 -10
  69. data/lib/llm_cost_tracker/integrations/anthropic.rb +24 -5
  70. data/lib/llm_cost_tracker/integrations/base.rb +22 -5
  71. data/lib/llm_cost_tracker/integrations/openai.rb +300 -66
  72. data/lib/llm_cost_tracker/integrations/ruby_llm.rb +105 -6
  73. data/lib/llm_cost_tracker/integrations.rb +19 -1
  74. data/lib/llm_cost_tracker/ledger/period/totals.rb +21 -5
  75. data/lib/llm_cost_tracker/ledger/rollups.rb +24 -10
  76. data/lib/llm_cost_tracker/ledger/schema/call_line_items.rb +30 -1
  77. data/lib/llm_cost_tracker/ledger/schema/call_rollups.rb +3 -3
  78. data/lib/llm_cost_tracker/ledger/schema/call_tags.rb +17 -2
  79. data/lib/llm_cost_tracker/ledger/schema/calls.rb +2 -0
  80. data/lib/llm_cost_tracker/ledger/schema/ingestion_inbox_entries.rb +47 -0
  81. data/lib/llm_cost_tracker/ledger/schema/ingestion_leases.rb +42 -0
  82. data/lib/llm_cost_tracker/ledger/schema/provider_invoice_imports.rb +46 -0
  83. data/lib/llm_cost_tracker/ledger/schema/provider_invoices.rb +2 -2
  84. data/lib/llm_cost_tracker/ledger/store.rb +14 -14
  85. data/lib/llm_cost_tracker/ledger/tags/encoding.rb +37 -0
  86. data/lib/llm_cost_tracker/ledger/tags/query.rb +2 -1
  87. data/lib/llm_cost_tracker/ledger.rb +2 -1
  88. data/lib/llm_cost_tracker/masking.rb +39 -0
  89. data/lib/llm_cost_tracker/middleware/faraday.rb +88 -29
  90. data/lib/llm_cost_tracker/parsers/anthropic.rb +22 -7
  91. data/lib/llm_cost_tracker/parsers/base.rb +5 -1
  92. data/lib/llm_cost_tracker/parsers/gemini.rb +4 -0
  93. data/lib/llm_cost_tracker/parsers/openai.rb +16 -2
  94. data/lib/llm_cost_tracker/parsers/openai_compatible.rb +5 -1
  95. data/lib/llm_cost_tracker/parsers/openai_service_charges.rb +49 -10
  96. data/lib/llm_cost_tracker/parsers/openai_usage.rb +124 -53
  97. data/lib/llm_cost_tracker/prices.json +110 -19
  98. data/lib/llm_cost_tracker/pricing/effective_prices.rb +5 -36
  99. data/lib/llm_cost_tracker/pricing/lookup.rb +36 -3
  100. data/lib/llm_cost_tracker/pricing/mode.rb +76 -0
  101. data/lib/llm_cost_tracker/pricing/registry.rb +3 -1
  102. data/lib/llm_cost_tracker/pricing/service_charges.rb +9 -3
  103. data/lib/llm_cost_tracker/pricing/sync/registry_writer.rb +50 -1
  104. data/lib/llm_cost_tracker/pricing/sync.rb +3 -1
  105. data/lib/llm_cost_tracker/pricing.rb +47 -19
  106. data/lib/llm_cost_tracker/railtie.rb +6 -0
  107. data/lib/llm_cost_tracker/reconcile_tasks.rb +134 -0
  108. data/lib/llm_cost_tracker/reconciliation/diff.rb +428 -0
  109. data/lib/llm_cost_tracker/reconciliation/diff_result.rb +48 -0
  110. data/lib/llm_cost_tracker/reconciliation/import_result.rb +19 -0
  111. data/lib/llm_cost_tracker/reconciliation/importer.rb +253 -0
  112. data/lib/llm_cost_tracker/reconciliation/sources/anthropic_usage.rb +171 -0
  113. data/lib/llm_cost_tracker/reconciliation/sources/fingerprint.rb +20 -0
  114. data/lib/llm_cost_tracker/reconciliation/sources/openai_usage.rb +142 -0
  115. data/lib/llm_cost_tracker/reconciliation.rb +118 -0
  116. data/lib/llm_cost_tracker/report/data.rb +4 -1
  117. data/lib/llm_cost_tracker/retention.rb +15 -2
  118. data/lib/llm_cost_tracker/tags/context.rb +3 -4
  119. data/lib/llm_cost_tracker/tags/sanitizer.rb +60 -4
  120. data/lib/llm_cost_tracker/token_usage.rb +10 -2
  121. data/lib/llm_cost_tracker/tracker.rb +45 -18
  122. data/lib/llm_cost_tracker/version.rb +1 -1
  123. data/lib/llm_cost_tracker.rb +9 -0
  124. data/lib/tasks/llm_cost_tracker.rake +25 -2
  125. metadata +36 -1
@@ -6,7 +6,7 @@ require_relative "../parsers/openai_service_charges"
6
6
 
7
7
  module LlmCostTracker
8
8
  module Integrations
9
- module Openai
9
+ module Openai # rubocop:disable Metrics/ModuleLength
10
10
  extend Base
11
11
 
12
12
  class << self
@@ -14,8 +14,30 @@ module LlmCostTracker
14
14
  :openai
15
15
  end
16
16
 
17
- def stream_pricing_mode(request)
18
- Pricing.normalize_mode((request || {})[:service_tier])
17
+ def stream_pricing_mode(request, host: nil)
18
+ LlmCostTracker::Parsers::OpenaiUsage.combined_pricing_mode(
19
+ host: host,
20
+ model: (request || {})[:model],
21
+ service_tier: (request || {})[:service_tier]
22
+ )
23
+ end
24
+
25
+ def stream_collector(request, host: nil)
26
+ LlmCostTracker::Capture::StreamCollector.new(
27
+ provider: integration_name.to_s,
28
+ model: request[:model],
29
+ pricing_mode: stream_pricing_mode(request, host: host),
30
+ request: request
31
+ )
32
+ end
33
+
34
+ def client_host_for(resource)
35
+ client = resource.instance_variable_get(:@client)
36
+ return nil unless client.respond_to?(:base_url, true)
37
+
38
+ URI.parse(client.send(:base_url).to_s).host
39
+ rescue URI::InvalidURIError
40
+ nil
19
41
  end
20
42
 
21
43
  def minimum_version
@@ -28,20 +50,40 @@ module LlmCostTracker
28
50
 
29
51
  def patch_targets
30
52
  [
31
- patch_target(
32
- "OpenAI::Resources::Responses",
33
- with: ResponsesPatch,
34
- methods: %i[create stream stream_raw retrieve_streaming]
35
- ),
36
- patch_target(
37
- "OpenAI::Resources::Chat::Completions",
38
- with: ChatCompletionsPatch,
39
- methods: %i[create stream_raw]
40
- )
53
+ patch_target("OpenAI::Resources::Responses",
54
+ with: ResponsesPatch, methods: %i[create stream stream_raw retrieve_streaming]),
55
+ patch_target("OpenAI::Resources::Chat::Completions",
56
+ with: ChatCompletionsPatch, methods: %i[create stream stream_raw]),
57
+ *auxiliary_patch_targets
58
+ ]
59
+ end
60
+
61
+ def auxiliary_patch_targets
62
+ [
63
+ patch_target("OpenAI::Resources::Embeddings",
64
+ with: EmbeddingsPatch, methods: %i[create], optional: true),
65
+ patch_target("OpenAI::Resources::Images",
66
+ with: ImagesPatch, methods: %i[generate edit create_variation], optional: true),
67
+ patch_target("OpenAI::Resources::Images",
68
+ with: StreamingImagesPatch,
69
+ methods: %i[generate_stream_raw edit_stream_raw],
70
+ optional: true, skip_when_methods_missing: true),
71
+ patch_target("OpenAI::Resources::Audio::Transcriptions",
72
+ with: TranscriptionsPatch, methods: %i[create], optional: true),
73
+ patch_target("OpenAI::Resources::Audio::Transcriptions",
74
+ with: StreamingTranscriptionsPatch,
75
+ methods: %i[create_streaming],
76
+ optional: true, skip_when_methods_missing: true),
77
+ patch_target("OpenAI::Resources::Audio::Translations",
78
+ with: TranslationsPatch, methods: %i[create], optional: true),
79
+ patch_target("OpenAI::Resources::Audio::Speech",
80
+ with: SpeechPatch, methods: %i[create], optional: true),
81
+ patch_target("OpenAI::Resources::Moderations",
82
+ with: ModerationsPatch, methods: %i[create], optional: true)
41
83
  ]
42
84
  end
43
85
 
44
- def record_response(response, request:, latency_ms:)
86
+ def record_response(response, request:, latency_ms:, host: nil)
45
87
  return unless active?
46
88
 
47
89
  record_safely do
@@ -53,27 +95,141 @@ module LlmCostTracker
53
95
  next if input_tokens.nil? && output_tokens.nil?
54
96
 
55
97
  cache_read = cache_read_input_tokens(usage)
98
+ model = object_value(response, :model) || request[:model]
56
99
  LlmCostTracker::Tracker.record(
57
100
  capture: UsageCapture.build(
58
101
  provider: "openai",
59
- model: object_value(response, :model) || request[:model],
60
- pricing_mode: object_value(response, :service_tier) || request[:service_tier],
61
- token_usage: token_usage(usage:, input_tokens:, output_tokens:, cache_read:),
102
+ model: model,
103
+ pricing_mode: LlmCostTracker::Parsers::OpenaiUsage.combined_pricing_mode(
104
+ host: host,
105
+ model: model,
106
+ service_tier: object_value(response, :service_tier) || request[:service_tier]
107
+ ),
108
+ token_usage: token_usage(usage:, input_tokens:, output_tokens:, cache_read:, model: model),
62
109
  usage_source: :sdk_response,
63
110
  provider_response_id: object_value(response, :id),
64
- service_line_items: service_line_items_from(response)
111
+ service_line_items: service_line_items_from(response, request: request)
65
112
  ),
66
113
  latency_ms: latency_ms
67
114
  )
68
115
  end
69
116
  end
70
117
 
71
- def service_line_items_from(response)
118
+ def record_image(response, request:, latency_ms:)
119
+ usage = object_value(response, :usage)
120
+ raw_input = usage ? object_value(usage, :input_tokens).to_i : 0
121
+ raw_output = usage ? object_value(usage, :output_tokens).to_i : 0
122
+ image_input = image_input_tokens(usage).to_i
123
+ cache_read = cache_read_input_tokens(usage).to_i
124
+ text_input = [raw_input - image_input - cache_read, 0].max
125
+ image_output, text_output = split_image_output(usage, raw_output)
126
+ record_passthrough(
127
+ model: request[:model],
128
+ response: response,
129
+ latency_ms: latency_ms,
130
+ input_tokens: text_input,
131
+ image_input_tokens: image_input,
132
+ output_tokens: text_output,
133
+ image_output_tokens: image_output,
134
+ cache_read_input_tokens: cache_read
135
+ )
136
+ end
137
+
138
+ def split_image_output(usage, raw_output)
139
+ image_tokens = image_output_tokens(usage).to_i
140
+ text_tokens = text_output_tokens(usage).to_i
141
+ return [raw_output, 0] if image_tokens.zero? && text_tokens.zero?
142
+
143
+ text_tokens = [raw_output - image_tokens, 0].max if text_tokens.zero?
144
+ [image_tokens, text_tokens]
145
+ end
146
+
147
+ def record_transcription(response, request:, latency_ms:)
148
+ record_passthrough(
149
+ model: request[:model],
150
+ response: response,
151
+ latency_ms: latency_ms,
152
+ **transcription_token_attributes(object_value(response, :usage))
153
+ )
154
+ end
155
+
156
+ def transcription_token_attributes(usage)
157
+ return { input_tokens: 0, output_tokens: 0 } unless usage && object_value(usage, :type).to_s == "tokens"
158
+
159
+ raw_input = object_value(usage, :input_tokens).to_i
160
+ audio_input = object_dig(usage, :input_token_details, :audio_tokens).to_i
161
+ {
162
+ input_tokens: [raw_input - audio_input, 0].max,
163
+ audio_input_tokens: audio_input,
164
+ output_tokens: object_value(usage, :output_tokens).to_i
165
+ }
166
+ end
167
+
168
+ def record_speech(_response, request:, latency_ms:)
169
+ record_passthrough(
170
+ model: request[:model],
171
+ response: nil,
172
+ latency_ms: latency_ms,
173
+ input_tokens: 0,
174
+ output_tokens: 0,
175
+ service_line_items: speech_line_items(request)
176
+ )
177
+ end
178
+
179
+ CHARACTER_BILLED_TTS_MODELS = /\Atts-1(-hd)?\z/
180
+ private_constant :CHARACTER_BILLED_TTS_MODELS
181
+
182
+ def speech_line_items(request)
183
+ input = request[:input]
184
+ return [] unless input.is_a?(String)
185
+ return [] unless CHARACTER_BILLED_TTS_MODELS.match?(request[:model].to_s)
186
+
187
+ [LlmCostTracker::Billing::LineItem.build(
188
+ component_key: :text_to_speech_character,
189
+ quantity: input.length,
190
+ cost_status: LlmCostTracker::Billing::CostStatus::UNKNOWN,
191
+ pricing_basis: :provider_usage,
192
+ provider_field: "request.input"
193
+ )]
194
+ end
195
+
196
+ def record_moderation(response, request:, latency_ms:)
197
+ record_passthrough(
198
+ model: object_value(response, :model) || request[:model],
199
+ response: response,
200
+ latency_ms: latency_ms,
201
+ input_tokens: 0,
202
+ output_tokens: 0
203
+ )
204
+ end
205
+
206
+ def record_passthrough(model:, response:, latency_ms:, service_line_items: [], **token_attributes)
207
+ return unless active?
208
+
209
+ record_safely do
210
+ LlmCostTracker::Tracker.record(
211
+ capture: UsageCapture.build(
212
+ provider: "openai",
213
+ model: model,
214
+ token_usage: TokenUsage.build(**token_attributes),
215
+ usage_source: :sdk_response,
216
+ provider_response_id: response && object_value(response, :id),
217
+ service_line_items: service_line_items
218
+ ),
219
+ latency_ms: latency_ms
220
+ )
221
+ end
222
+ end
223
+
224
+ def service_line_items_from(response, request: nil)
72
225
  output = object_value(response, :output)
73
226
  return [] unless output.respond_to?(:each)
74
227
 
75
- LlmCostTracker::Parsers::OpenaiServiceCharges
76
- .line_items_from_output(output.map { |item| normalize_output_item(item) })
228
+ LlmCostTracker::Parsers::OpenaiServiceCharges.line_items_from_output(
229
+ output.map { |item| normalize_output_item(item) },
230
+ request: request,
231
+ model: object_value(response, :model) || request&.dig(:model)
232
+ )
77
233
  end
78
234
 
79
235
  def normalize_output_item(item)
@@ -81,7 +237,7 @@ module LlmCostTracker
81
237
  return nil if item.nil?
82
238
 
83
239
  {
84
- "type" => object_value(item, :type),
240
+ "type" => object_value(item, :type)&.to_s,
85
241
  "id" => object_value(item, :id),
86
242
  "status" => object_value(item, :status),
87
243
  "container_id" => object_value(item, :container_id),
@@ -93,19 +249,34 @@ module LlmCostTracker
93
249
  return nil if action.nil?
94
250
  return action if action.is_a?(Hash)
95
251
 
96
- { "type" => object_value(action, :type) }
252
+ { "type" => object_value(action, :type)&.to_s }
97
253
  end
98
254
 
99
- def token_usage(usage:, input_tokens:, output_tokens:, cache_read:)
255
+ IMAGE_OUTPUT_MODEL_PATTERN = /\Agpt-image-/i
256
+ private_constant :IMAGE_OUTPUT_MODEL_PATTERN
257
+
258
+ def token_usage(usage:, input_tokens:, output_tokens:, cache_read:, model: nil)
100
259
  audio_input = audio_input_tokens(usage)
101
260
  audio_output = audio_output_tokens(usage)
261
+ image_input = image_input_tokens(usage)
262
+ image_output_details = image_output_tokens(usage)
263
+ text_output_details = text_output_tokens(usage)
264
+ image_output, regular_output = split_responses_image_output(
265
+ output_tokens: output_tokens.to_i,
266
+ image_output_details: image_output_details,
267
+ text_output_details: text_output_details,
268
+ audio_output: audio_output,
269
+ default_to_image: model.to_s.match?(IMAGE_OUTPUT_MODEL_PATTERN)
270
+ )
102
271
 
103
272
  TokenUsage.build(
104
- input_tokens: regular_input_tokens(input_tokens, cache_read, audio_input),
105
- output_tokens: regular_output_tokens(output_tokens, audio_output),
273
+ input_tokens: regular_input_tokens(input_tokens, cache_read, audio_input, image_input),
274
+ output_tokens: regular_output,
106
275
  cache_read_input_tokens: cache_read,
107
276
  audio_input_tokens: audio_input,
108
277
  audio_output_tokens: audio_output,
278
+ image_input_tokens: image_input,
279
+ image_output_tokens: image_output,
109
280
  hidden_output_tokens: hidden_output_tokens(usage)
110
281
  )
111
282
  end
@@ -113,44 +284,36 @@ module LlmCostTracker
113
284
  INPUT_DETAIL_KEYS = %i[input_tokens_details input_token_details prompt_tokens_details].freeze
114
285
  OUTPUT_DETAIL_KEYS = %i[output_tokens_details output_token_details completion_tokens_details].freeze
115
286
 
116
- def cache_read_input_tokens(usage)
117
- input_detail(usage, :cached_tokens)
118
- end
119
-
120
- def hidden_output_tokens(usage)
121
- output_detail(usage, :reasoning_tokens)
122
- end
123
-
124
- def audio_input_tokens(usage)
125
- input_detail(usage, :audio_tokens)
126
- end
127
-
128
- def audio_output_tokens(usage)
129
- output_detail(usage, :audio_tokens)
130
- end
287
+ def cache_read_input_tokens(usage) = detail(usage, INPUT_DETAIL_KEYS, :cached_tokens)
288
+ def hidden_output_tokens(usage) = detail(usage, OUTPUT_DETAIL_KEYS, :reasoning_tokens)
289
+ def audio_input_tokens(usage) = detail(usage, INPUT_DETAIL_KEYS, :audio_tokens)
290
+ def audio_output_tokens(usage) = detail(usage, OUTPUT_DETAIL_KEYS, :audio_tokens)
291
+ def image_input_tokens(usage) = detail(usage, INPUT_DETAIL_KEYS, :image_tokens)
292
+ def image_output_tokens(usage) = detail(usage, OUTPUT_DETAIL_KEYS, :image_tokens)
293
+ def text_output_tokens(usage) = detail(usage, OUTPUT_DETAIL_KEYS, :text_tokens)
131
294
 
132
- def input_detail(usage, key)
133
- INPUT_DETAIL_KEYS.each do |container|
295
+ def detail(usage, containers, key)
296
+ containers.each do |container|
134
297
  value = object_dig(usage, container, key)
135
298
  return value.to_i if value
136
299
  end
137
300
  0
138
301
  end
139
302
 
140
- def output_detail(usage, key)
141
- OUTPUT_DETAIL_KEYS.each do |container|
142
- value = object_dig(usage, container, key)
143
- return value.to_i if value
144
- end
145
- 0
303
+ def regular_input_tokens(input_tokens, cache_read, audio_input, image_input)
304
+ [input_tokens.to_i - cache_read - audio_input - image_input, 0].max
146
305
  end
147
306
 
148
- def regular_input_tokens(input_tokens, cache_read, audio_input)
149
- [input_tokens.to_i - cache_read - audio_input, 0].max
150
- end
307
+ def split_responses_image_output(output_tokens:, image_output_details:, text_output_details:, audio_output:,
308
+ default_to_image: false)
309
+ if image_output_details.zero? && text_output_details.zero?
310
+ remainder = [output_tokens - audio_output, 0].max
311
+ return default_to_image ? [remainder, 0] : [0, remainder]
312
+ end
151
313
 
152
- def regular_output_tokens(output_tokens, audio_output)
153
- [output_tokens.to_i - audio_output, 0].max
314
+ text_output = text_output_details
315
+ text_output = [output_tokens - image_output_details - audio_output, 0].max if text_output.zero?
316
+ [image_output_details, text_output]
154
317
  end
155
318
  end
156
319
 
@@ -158,11 +321,12 @@ module LlmCostTracker
158
321
  def create(*args, **kwargs)
159
322
  LlmCostTracker::Integrations::Openai.enforce_budget!
160
323
  started_at = LlmCostTracker::Timing.now_monotonic
161
- response = super
324
+ response = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
162
325
  LlmCostTracker::Integrations::Openai.record_response(
163
326
  response,
164
327
  request: LlmCostTracker::Integrations::Openai.request_params(args, kwargs),
165
- latency_ms: LlmCostTracker::Integrations::Openai.elapsed_ms(started_at)
328
+ latency_ms: LlmCostTracker::Integrations::Openai.elapsed_ms(started_at),
329
+ host: LlmCostTracker::Integrations::Openai.client_host_for(self)
166
330
  )
167
331
  response
168
332
  end
@@ -170,25 +334,28 @@ module LlmCostTracker
170
334
  def stream(*args, **kwargs)
171
335
  request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
172
336
  LlmCostTracker::Integrations::Openai.enforce_budget!
173
- collector = LlmCostTracker::Integrations::Openai.stream_collector(request)
174
- stream = super
337
+ host = LlmCostTracker::Integrations::Openai.client_host_for(self)
338
+ collector = LlmCostTracker::Integrations::Openai.stream_collector(request, host: host)
339
+ stream = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
175
340
  LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
176
341
  end
177
342
 
178
343
  def stream_raw(*args, **kwargs)
179
344
  request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
180
345
  LlmCostTracker::Integrations::Openai.enforce_budget!
181
- collector = LlmCostTracker::Integrations::Openai.stream_collector(request)
182
- stream = super
346
+ host = LlmCostTracker::Integrations::Openai.client_host_for(self)
347
+ collector = LlmCostTracker::Integrations::Openai.stream_collector(request, host: host)
348
+ stream = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
183
349
  LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
184
350
  end
185
351
 
186
352
  def retrieve_streaming(response_id, *args, **kwargs)
187
353
  request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
188
354
  LlmCostTracker::Integrations::Openai.enforce_budget!
189
- collector = LlmCostTracker::Integrations::Openai.stream_collector(request)
355
+ host = LlmCostTracker::Integrations::Openai.client_host_for(self)
356
+ collector = LlmCostTracker::Integrations::Openai.stream_collector(request, host: host)
190
357
  collector.provider_response_id = response_id
191
- stream = super
358
+ stream = super(response_id, *LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
192
359
  LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
193
360
  end
194
361
  end
@@ -197,20 +364,87 @@ module LlmCostTracker
197
364
  def create(*args, **kwargs)
198
365
  LlmCostTracker::Integrations::Openai.enforce_budget!
199
366
  started_at = LlmCostTracker::Timing.now_monotonic
200
- response = super
367
+ response = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
201
368
  LlmCostTracker::Integrations::Openai.record_response(
202
369
  response,
203
370
  request: LlmCostTracker::Integrations::Openai.request_params(args, kwargs),
204
- latency_ms: LlmCostTracker::Integrations::Openai.elapsed_ms(started_at)
371
+ latency_ms: LlmCostTracker::Integrations::Openai.elapsed_ms(started_at),
372
+ host: LlmCostTracker::Integrations::Openai.client_host_for(self)
205
373
  )
206
374
  response
207
375
  end
208
376
 
377
+ def stream(*args, **kwargs)
378
+ request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
379
+ LlmCostTracker::Integrations::Openai.enforce_budget!
380
+ host = LlmCostTracker::Integrations::Openai.client_host_for(self)
381
+ collector = LlmCostTracker::Integrations::Openai.stream_collector(request, host: host)
382
+ stream = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
383
+ LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
384
+ end
385
+
209
386
  def stream_raw(*args, **kwargs)
210
387
  request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
211
388
  LlmCostTracker::Integrations::Openai.enforce_budget!
212
- collector = LlmCostTracker::Integrations::Openai.stream_collector(request)
213
- stream = super
389
+ host = LlmCostTracker::Integrations::Openai.client_host_for(self)
390
+ collector = LlmCostTracker::Integrations::Openai.stream_collector(request, host: host)
391
+ stream = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
392
+ LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
393
+ end
394
+ end
395
+
396
+ module PatchBuilder
397
+ module_function
398
+
399
+ def build(record_method:, methods:)
400
+ Module.new.tap do |mod|
401
+ methods.each { |method_name| define_wrapped_method(mod, method_name, record_method) }
402
+ end
403
+ end
404
+
405
+ def define_wrapped_method(mod, method_name, record_method)
406
+ mod.define_method(method_name) do |*args, **kwargs, &block|
407
+ integration = LlmCostTracker::Integrations::Openai
408
+ integration.enforce_budget!
409
+ started_at = LlmCostTracker::Timing.now_monotonic
410
+ response = super(*integration.normalize_sdk_args(args, kwargs), &block)
411
+ integration.public_send(
412
+ record_method, response,
413
+ request: integration.request_params(args, kwargs),
414
+ latency_ms: integration.elapsed_ms(started_at)
415
+ )
416
+ response
417
+ end
418
+ end
419
+ end
420
+
421
+ EmbeddingsPatch = PatchBuilder.build(record_method: :record_response, methods: %i[create])
422
+ ImagesPatch = PatchBuilder.build(record_method: :record_image, methods: %i[generate edit create_variation])
423
+ TranscriptionsPatch = PatchBuilder.build(record_method: :record_transcription, methods: %i[create])
424
+ TranslationsPatch = PatchBuilder.build(record_method: :record_transcription, methods: %i[create])
425
+ SpeechPatch = PatchBuilder.build(record_method: :record_speech, methods: %i[create])
426
+ ModerationsPatch = PatchBuilder.build(record_method: :record_moderation, methods: %i[create])
427
+
428
+ module StreamingImagesPatch
429
+ %i[generate_stream_raw edit_stream_raw].each do |method_name|
430
+ define_method(method_name) do |*args, **kwargs|
431
+ request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
432
+ LlmCostTracker::Integrations::Openai.enforce_budget!
433
+ host = LlmCostTracker::Integrations::Openai.client_host_for(self)
434
+ collector = LlmCostTracker::Integrations::Openai.stream_collector(request, host: host)
435
+ stream = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
436
+ LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
437
+ end
438
+ end
439
+ end
440
+
441
+ module StreamingTranscriptionsPatch
442
+ def create_streaming(*args, **kwargs)
443
+ request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
444
+ LlmCostTracker::Integrations::Openai.enforce_budget!
445
+ host = LlmCostTracker::Integrations::Openai.client_host_for(self)
446
+ collector = LlmCostTracker::Integrations::Openai.stream_collector(request, host: host)
447
+ stream = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
214
448
  LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
215
449
  end
216
450
  end
@@ -25,7 +25,7 @@ module LlmCostTracker
25
25
  patch_target(
26
26
  "RubyLLM::Provider",
27
27
  with: ProviderPatch,
28
- methods: %i[slug complete embed transcribe]
28
+ methods: %i[slug complete embed transcribe paint moderate]
29
29
  )
30
30
  ]
31
31
  end
@@ -65,6 +65,69 @@ module LlmCostTracker
65
65
  )
66
66
  end
67
67
 
68
+ def record_image(provider, response, request:, latency_ms:)
69
+ usage = object_value(response, :usage)
70
+ usage = {} unless usage.is_a?(Hash)
71
+ raw_input = (usage[:input_tokens] || usage["input_tokens"]).to_i
72
+ raw_output = (usage[:output_tokens] || usage["output_tokens"]).to_i
73
+ image_input = image_token_detail(usage, :input)
74
+ image_output = image_token_detail(usage, :output)
75
+ text_input = [raw_input - image_input, 0].max
76
+ text_output = [raw_output - image_output, 0].max
77
+ record_passthrough(
78
+ provider: provider_slug(provider),
79
+ model: response_model_id(response) || model_id(request[:model]),
80
+ response: response,
81
+ latency_ms: latency_ms,
82
+ input_tokens: text_input,
83
+ image_input_tokens: image_input,
84
+ output_tokens: text_output,
85
+ image_output_tokens: image_output
86
+ )
87
+ end
88
+
89
+ def record_moderation(provider, response, request:, latency_ms:)
90
+ record_passthrough(
91
+ provider: provider_slug(provider),
92
+ model: response_model_id(response) || model_id(request[:model]),
93
+ response: response,
94
+ latency_ms: latency_ms,
95
+ input_tokens: 0,
96
+ output_tokens: 0
97
+ )
98
+ end
99
+
100
+ def image_token_detail(usage, direction)
101
+ container_key = direction == :input ? :input_tokens_details : :output_tokens_details
102
+ details = usage[container_key] || usage[container_key.to_s] || {}
103
+ return 0 unless details.is_a?(Hash)
104
+
105
+ (details[:image_tokens] || details["image_tokens"]).to_i
106
+ end
107
+
108
+ def record_passthrough(provider:, model:, response:, latency_ms:, input_tokens:, output_tokens:,
109
+ image_input_tokens: 0, image_output_tokens: 0)
110
+ return unless active?
111
+
112
+ record_safely do
113
+ LlmCostTracker::Tracker.record(
114
+ capture: UsageCapture.build(
115
+ provider: provider,
116
+ model: model,
117
+ token_usage: TokenUsage.build(
118
+ input_tokens: input_tokens,
119
+ output_tokens: output_tokens,
120
+ image_input_tokens: image_input_tokens,
121
+ image_output_tokens: image_output_tokens
122
+ ),
123
+ usage_source: :sdk_response,
124
+ provider_response_id: provider_response_id(response)
125
+ ),
126
+ latency_ms: latency_ms
127
+ )
128
+ end
129
+ end
130
+
68
131
  def record_usage(provider:, model:, response:, latency_ms:, stream:, output_tokens: nil)
69
132
  return unless active?
70
133
 
@@ -80,7 +143,7 @@ module LlmCostTracker
80
143
  capture: UsageCapture.build(
81
144
  provider: provider,
82
145
  model: model,
83
- pricing_mode: pricing_mode(response),
146
+ pricing_mode: pricing_mode(provider: provider, response: response),
84
147
  token_usage: TokenUsage.build(
85
148
  input_tokens: regular_input_tokens(input_tokens, cache_read),
86
149
  output_tokens: output_tokens.to_i,
@@ -122,10 +185,16 @@ module LlmCostTracker
122
185
  object_value(response, :id, :provider_response_id) || object_dig(response, :raw, :id)
123
186
  end
124
187
 
125
- def pricing_mode(response)
126
- object_value(response, :pricing_mode, :service_tier) ||
127
- object_dig(response, :raw, :pricing_mode) ||
128
- object_dig(response, :raw, :service_tier)
188
+ ANTHROPIC_STANDARD_EQUIVALENT_SERVICE_TIERS = %w[standard standard_only priority].freeze
189
+ private_constant :ANTHROPIC_STANDARD_EQUIVALENT_SERVICE_TIERS
190
+
191
+ def pricing_mode(provider:, response:)
192
+ raw = object_value(response, :pricing_mode, :service_tier) ||
193
+ object_dig(response, :raw, :pricing_mode) ||
194
+ object_dig(response, :raw, :service_tier)
195
+ return nil if provider == "anthropic" && ANTHROPIC_STANDARD_EQUIVALENT_SERVICE_TIERS.include?(raw.to_s)
196
+
197
+ raw
129
198
  end
130
199
  end
131
200
 
@@ -175,6 +244,36 @@ module LlmCostTracker
175
244
  )
176
245
  response
177
246
  end
247
+
248
+ def paint(*args, **kwargs)
249
+ integration = LlmCostTracker::Integrations::RubyLlm
250
+ request = integration.request_params(args, kwargs)
251
+ integration.enforce_budget!
252
+ started_at = LlmCostTracker::Timing.now_monotonic
253
+ response = super
254
+ integration.record_image(
255
+ self,
256
+ response,
257
+ request: request,
258
+ latency_ms: integration.elapsed_ms(started_at)
259
+ )
260
+ response
261
+ end
262
+
263
+ def moderate(*args, **kwargs)
264
+ integration = LlmCostTracker::Integrations::RubyLlm
265
+ request = integration.request_params(args, kwargs)
266
+ integration.enforce_budget!
267
+ started_at = LlmCostTracker::Timing.now_monotonic
268
+ response = super
269
+ integration.record_moderation(
270
+ self,
271
+ response,
272
+ request: request,
273
+ latency_ms: integration.elapsed_ms(started_at)
274
+ )
275
+ response
276
+ end
178
277
  end
179
278
  end
180
279
  end
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "errors"
4
+ require_relative "logging"
4
5
  require_relative "integrations/openai"
5
6
  require_relative "integrations/anthropic"
6
7
  require_relative "integrations/ruby_llm"
@@ -13,10 +14,14 @@ module LlmCostTracker
13
14
  ruby_llm: RubyLlm
14
15
  }.freeze
15
16
 
17
+ DOUBLE_INSTRUMENTATION_OVERLAPS = %i[openai anthropic].freeze
18
+
16
19
  module_function
17
20
 
18
21
  def install!(names = LlmCostTracker.configuration.instrumented_integrations)
19
- normalize(names).each { |name| fetch(name).install }
22
+ normalized = normalize(names)
23
+ warn_double_instrumentation(normalized)
24
+ normalized.each { |name| fetch(name).install }
20
25
  end
21
26
 
22
27
  def checks(names = LlmCostTracker.configuration.instrumented_integrations)
@@ -29,6 +34,19 @@ module LlmCostTracker
29
34
  Array(names).flatten.uniq
30
35
  end
31
36
 
37
+ def warn_double_instrumentation(names)
38
+ return unless names.include?(:ruby_llm)
39
+
40
+ overlapping = names & DOUBLE_INSTRUMENTATION_OVERLAPS
41
+ return if overlapping.empty?
42
+
43
+ Logging.warn(
44
+ ":ruby_llm is enabled together with #{overlapping.map(&:inspect).join(', ')}. " \
45
+ "RubyLLM uses HTTP underneath, so calls routed to those providers may be recorded twice " \
46
+ "(once via the SDK patch, once via the Faraday parser). Pick one path per provider."
47
+ )
48
+ end
49
+
32
50
  def fetch(name)
33
51
  AVAILABLE.fetch(name) do
34
52
  message = "Unknown integration: #{name.inspect}. Use one of: #{names.join(', ')}"