llm_cost_tracker 0.7.3 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. checksums.yaml +4 -4
  2. data/.ruby-version +1 -0
  3. data/CHANGELOG.md +173 -0
  4. data/README.md +60 -220
  5. data/app/assets/llm_cost_tracker/application.css +282 -45
  6. data/app/controllers/llm_cost_tracker/application_controller.rb +25 -20
  7. data/app/controllers/llm_cost_tracker/assets_controller.rb +11 -1
  8. data/app/controllers/llm_cost_tracker/calls_controller.rb +22 -19
  9. data/app/controllers/llm_cost_tracker/data_quality_controller.rb +14 -2
  10. data/app/controllers/llm_cost_tracker/reconciliation_controller.rb +106 -0
  11. data/app/controllers/llm_cost_tracker/tags_controller.rb +15 -1
  12. data/app/helpers/llm_cost_tracker/application_helper.rb +18 -21
  13. data/app/helpers/llm_cost_tracker/dashboard_filter_helper.rb +3 -21
  14. data/app/helpers/llm_cost_tracker/dashboard_filter_options_helper.rb +4 -4
  15. data/app/helpers/llm_cost_tracker/dashboard_query_helper.rb +1 -1
  16. data/app/helpers/llm_cost_tracker/inline_style_helper.rb +28 -0
  17. data/app/helpers/llm_cost_tracker/reconciliation_helper.rb +13 -0
  18. data/app/helpers/llm_cost_tracker/token_usage_helper.rb +24 -7
  19. data/app/models/llm_cost_tracker/call.rb +166 -0
  20. data/app/models/llm_cost_tracker/call_line_item.rb +18 -0
  21. data/app/models/llm_cost_tracker/call_rollup.rb +6 -0
  22. data/app/models/llm_cost_tracker/call_tag.rb +12 -0
  23. data/app/models/llm_cost_tracker/ingestion/inbox_entry.rb +9 -0
  24. data/app/models/llm_cost_tracker/ingestion/lease.rb +0 -3
  25. data/app/models/llm_cost_tracker/provider_invoice.rb +13 -0
  26. data/app/models/llm_cost_tracker/provider_invoice_import.rb +24 -0
  27. data/app/services/llm_cost_tracker/dashboard/data_quality.rb +152 -32
  28. data/app/services/llm_cost_tracker/dashboard/date_range.rb +1 -1
  29. data/app/services/llm_cost_tracker/dashboard/filter.rb +8 -6
  30. data/app/services/llm_cost_tracker/dashboard/overview_stats.rb +74 -21
  31. data/app/services/llm_cost_tracker/dashboard/pagination.rb +6 -4
  32. data/app/services/llm_cost_tracker/dashboard/params.rb +8 -2
  33. data/app/services/llm_cost_tracker/dashboard/provider_breakdown.rb +1 -1
  34. data/app/services/llm_cost_tracker/dashboard/spend_anomaly.rb +4 -3
  35. data/app/services/llm_cost_tracker/dashboard/tag_breakdown.rb +42 -9
  36. data/app/services/llm_cost_tracker/dashboard/tag_key_explorer.rb +14 -37
  37. data/app/services/llm_cost_tracker/dashboard/time_series.rb +1 -1
  38. data/app/services/llm_cost_tracker/dashboard/top_models.rb +1 -1
  39. data/app/views/layouts/llm_cost_tracker/application.html.erb +6 -1
  40. data/app/views/llm_cost_tracker/calls/index.html.erb +33 -75
  41. data/app/views/llm_cost_tracker/calls/show.html.erb +73 -33
  42. data/app/views/llm_cost_tracker/dashboard/index.html.erb +16 -57
  43. data/app/views/llm_cost_tracker/data_quality/index.html.erb +183 -167
  44. data/app/views/llm_cost_tracker/errors/database.html.erb +1 -1
  45. data/app/views/llm_cost_tracker/models/index.html.erb +18 -50
  46. data/app/views/llm_cost_tracker/reconciliation/index.html.erb +183 -0
  47. data/app/views/llm_cost_tracker/shared/_bar.html.erb +1 -1
  48. data/app/views/llm_cost_tracker/shared/_filters.html.erb +66 -0
  49. data/app/views/llm_cost_tracker/shared/_metric_stack.html.erb +1 -1
  50. data/app/views/llm_cost_tracker/shared/_sort.html.erb +13 -0
  51. data/app/views/llm_cost_tracker/shared/setup_required.html.erb +1 -1
  52. data/app/views/llm_cost_tracker/tags/index.html.erb +3 -34
  53. data/app/views/llm_cost_tracker/tags/show.html.erb +64 -36
  54. data/config/routes.rb +3 -2
  55. data/lib/llm_cost_tracker/billing/components.rb +95 -0
  56. data/lib/llm_cost_tracker/billing/components.yml +188 -0
  57. data/lib/llm_cost_tracker/billing/cost_status.rb +45 -0
  58. data/lib/llm_cost_tracker/billing/line_item.rb +189 -0
  59. data/lib/llm_cost_tracker/budget.rb +26 -36
  60. data/lib/llm_cost_tracker/capture/stream_collector.rb +125 -38
  61. data/lib/llm_cost_tracker/capture/stream_tracker.rb +40 -5
  62. data/lib/llm_cost_tracker/configuration.rb +86 -17
  63. data/lib/llm_cost_tracker/dashboard_setup_state.rb +109 -0
  64. data/lib/llm_cost_tracker/doctor/cost_drift_check.rb +56 -0
  65. data/lib/llm_cost_tracker/doctor/ingestion_check.rb +48 -30
  66. data/lib/llm_cost_tracker/doctor/invoice_reconciliation_check.rb +164 -0
  67. data/lib/llm_cost_tracker/doctor/legacy_audit_check.rb +36 -0
  68. data/lib/llm_cost_tracker/doctor/legacy_billing_status_check.rb +22 -0
  69. data/lib/llm_cost_tracker/doctor/price_check.rb +2 -2
  70. data/lib/llm_cost_tracker/doctor/pricing_snapshot_drift_check.rb +85 -0
  71. data/lib/llm_cost_tracker/doctor/probe.rb +17 -0
  72. data/lib/llm_cost_tracker/doctor/schema_check.rb +34 -0
  73. data/lib/llm_cost_tracker/doctor.rb +111 -44
  74. data/lib/llm_cost_tracker/engine.rb +9 -0
  75. data/lib/llm_cost_tracker/errors.rb +5 -19
  76. data/lib/llm_cost_tracker/event.rb +11 -3
  77. data/lib/llm_cost_tracker/generators/llm_cost_tracker/call_rollups_generator.rb +43 -0
  78. data/lib/llm_cost_tracker/generators/llm_cost_tracker/durable_ingestion_generator.rb +43 -0
  79. data/lib/llm_cost_tracker/generators/llm_cost_tracker/install_generator.rb +17 -5
  80. data/lib/llm_cost_tracker/generators/llm_cost_tracker/prices_generator.rb +2 -6
  81. data/lib/llm_cost_tracker/generators/llm_cost_tracker/reconciliation_generator.rb +34 -0
  82. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_call_rollups.rb.erb +15 -0
  83. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_calls.rb.erb +104 -0
  84. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_durable_ingestion.rb.erb +29 -0
  85. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_reconciliation.rb.erb +55 -0
  86. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +28 -25
  87. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_call_rollups_provider.rb.erb +20 -0
  88. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_call_tags_key_value_index.rb.erb +32 -0
  89. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_image_tokens.rb.erb +18 -0
  90. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_call_rollups_provider_generator.rb +38 -0
  91. data/lib/llm_cost_tracker/generators/llm_cost_tracker/{add_provider_response_id_generator.rb → upgrade_call_tags_key_value_index_generator.rb} +5 -4
  92. data/lib/llm_cost_tracker/generators/llm_cost_tracker/{add_streaming_generator.rb → upgrade_image_tokens_generator.rb} +4 -4
  93. data/lib/llm_cost_tracker/ingestion/batch.rb +11 -12
  94. data/lib/llm_cost_tracker/ingestion/inbox.rb +39 -24
  95. data/lib/llm_cost_tracker/ingestion/inline.rb +22 -0
  96. data/lib/llm_cost_tracker/ingestion/worker.rb +24 -7
  97. data/lib/llm_cost_tracker/ingestion.rb +66 -22
  98. data/lib/llm_cost_tracker/integrations/anthropic.rb +68 -42
  99. data/lib/llm_cost_tracker/integrations/base.rb +56 -32
  100. data/lib/llm_cost_tracker/integrations/openai.rb +342 -63
  101. data/lib/llm_cost_tracker/integrations/ruby_llm.rb +110 -11
  102. data/lib/llm_cost_tracker/integrations.rb +21 -3
  103. data/lib/llm_cost_tracker/ledger/period/totals.rb +30 -11
  104. data/lib/llm_cost_tracker/ledger/period.rb +5 -5
  105. data/lib/llm_cost_tracker/ledger/rollups/upsert_sql.rb +2 -2
  106. data/lib/llm_cost_tracker/ledger/rollups.rb +90 -25
  107. data/lib/llm_cost_tracker/ledger/schema/adapter.rb +18 -0
  108. data/lib/llm_cost_tracker/ledger/schema/call_line_items.rb +79 -0
  109. data/lib/llm_cost_tracker/ledger/schema/call_rollups.rb +37 -0
  110. data/lib/llm_cost_tracker/ledger/schema/call_tags.rb +41 -0
  111. data/lib/llm_cost_tracker/ledger/schema/calls.rb +36 -23
  112. data/lib/llm_cost_tracker/ledger/schema/ingestion_inbox_entries.rb +47 -0
  113. data/lib/llm_cost_tracker/ledger/schema/ingestion_leases.rb +42 -0
  114. data/lib/llm_cost_tracker/ledger/schema/provider_invoice_imports.rb +46 -0
  115. data/lib/llm_cost_tracker/ledger/schema/provider_invoices.rb +57 -0
  116. data/lib/llm_cost_tracker/ledger/store.rb +103 -20
  117. data/lib/llm_cost_tracker/ledger/tags/encoding.rb +37 -0
  118. data/lib/llm_cost_tracker/ledger/tags/query.rb +6 -11
  119. data/lib/llm_cost_tracker/ledger/tags/sql.rb +27 -15
  120. data/lib/llm_cost_tracker/ledger.rb +5 -2
  121. data/lib/llm_cost_tracker/logging.rb +2 -5
  122. data/lib/llm_cost_tracker/masking.rb +39 -0
  123. data/lib/llm_cost_tracker/middleware/faraday.rb +95 -35
  124. data/lib/llm_cost_tracker/parsers/anthropic.rb +74 -14
  125. data/lib/llm_cost_tracker/parsers/base.rb +13 -4
  126. data/lib/llm_cost_tracker/parsers/gemini.rb +105 -15
  127. data/lib/llm_cost_tracker/parsers/openai.rb +16 -2
  128. data/lib/llm_cost_tracker/parsers/openai_compatible.rb +15 -3
  129. data/lib/llm_cost_tracker/parsers/openai_service_charges.rb +126 -0
  130. data/lib/llm_cost_tracker/parsers/openai_usage.rb +157 -59
  131. data/lib/llm_cost_tracker/parsers/sse.rb +1 -1
  132. data/lib/llm_cost_tracker/parsers.rb +1 -1
  133. data/lib/llm_cost_tracker/prices.json +198 -22
  134. data/lib/llm_cost_tracker/pricing/effective_prices.rb +28 -21
  135. data/lib/llm_cost_tracker/pricing/explainer.rb +4 -5
  136. data/lib/llm_cost_tracker/pricing/lookup.rb +73 -36
  137. data/lib/llm_cost_tracker/pricing/mode.rb +76 -0
  138. data/lib/llm_cost_tracker/pricing/registry.rb +67 -45
  139. data/lib/llm_cost_tracker/pricing/service_charges.rb +210 -0
  140. data/lib/llm_cost_tracker/pricing/sync/fetcher.rb +26 -17
  141. data/lib/llm_cost_tracker/pricing/sync/registry_diff.rb +6 -15
  142. data/lib/llm_cost_tracker/pricing/sync/registry_writer.rb +50 -1
  143. data/lib/llm_cost_tracker/pricing/sync.rb +59 -10
  144. data/lib/llm_cost_tracker/pricing/sync_change_printer.rb +32 -0
  145. data/lib/llm_cost_tracker/pricing.rb +220 -28
  146. data/lib/llm_cost_tracker/railtie.rb +6 -8
  147. data/lib/llm_cost_tracker/reconcile_tasks.rb +134 -0
  148. data/lib/llm_cost_tracker/reconciliation/diff.rb +428 -0
  149. data/lib/llm_cost_tracker/reconciliation/diff_result.rb +48 -0
  150. data/lib/llm_cost_tracker/reconciliation/import_result.rb +19 -0
  151. data/lib/llm_cost_tracker/reconciliation/importer.rb +253 -0
  152. data/lib/llm_cost_tracker/reconciliation/sources/anthropic_usage.rb +171 -0
  153. data/lib/llm_cost_tracker/reconciliation/sources/fingerprint.rb +20 -0
  154. data/lib/llm_cost_tracker/reconciliation/sources/openai_usage.rb +142 -0
  155. data/lib/llm_cost_tracker/reconciliation.rb +118 -0
  156. data/lib/llm_cost_tracker/report/data.rb +19 -8
  157. data/lib/llm_cost_tracker/report.rb +0 -4
  158. data/lib/llm_cost_tracker/retention.rb +22 -9
  159. data/lib/llm_cost_tracker/tags/context.rb +2 -5
  160. data/lib/llm_cost_tracker/tags/key.rb +4 -0
  161. data/lib/llm_cost_tracker/tags/sanitizer.rb +71 -20
  162. data/lib/llm_cost_tracker/timing.rb +15 -0
  163. data/lib/llm_cost_tracker/token_usage.rb +64 -42
  164. data/lib/llm_cost_tracker/tracker.rb +97 -27
  165. data/lib/llm_cost_tracker/usage_capture.rb +29 -8
  166. data/lib/llm_cost_tracker/version.rb +1 -1
  167. data/lib/llm_cost_tracker.rb +45 -35
  168. data/lib/tasks/llm_cost_tracker.rake +45 -17
  169. metadata +71 -41
  170. data/app/models/llm_cost_tracker/ingestion/event.rb +0 -13
  171. data/app/models/llm_cost_tracker/ledger/call.rb +0 -45
  172. data/app/models/llm_cost_tracker/ledger/call_metrics.rb +0 -66
  173. data/app/models/llm_cost_tracker/ledger/period/grouping.rb +0 -71
  174. data/app/models/llm_cost_tracker/ledger/period/total.rb +0 -13
  175. data/app/models/llm_cost_tracker/ledger/tags/accessors.rb +0 -19
  176. data/lib/llm_cost_tracker/configuration/instrumentation.rb +0 -33
  177. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_ingestion_generator.rb +0 -29
  178. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_latency_ms_generator.rb +0 -29
  179. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_period_totals_generator.rb +0 -29
  180. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_token_usage_generator.rb +0 -42
  181. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_ingestion_to_llm_cost_tracker.rb.erb +0 -33
  182. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_latency_ms_to_llm_api_calls.rb.erb +0 -9
  183. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_period_totals_to_llm_cost_tracker.rb.erb +0 -104
  184. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_provider_response_id_to_llm_api_calls.rb.erb +0 -15
  185. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_streaming_to_llm_api_calls.rb.erb +0 -21
  186. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_token_usage_to_llm_api_calls.rb.erb +0 -22
  187. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_api_calls.rb.erb +0 -83
  188. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_cost_precision.rb.erb +0 -26
  189. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_tags_to_jsonb.rb.erb +0 -44
  190. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_cost_precision_generator.rb +0 -29
  191. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_tags_to_jsonb_generator.rb +0 -29
  192. data/lib/llm_cost_tracker/ledger/rollups/batch.rb +0 -43
  193. data/lib/llm_cost_tracker/ledger/schema/period_totals.rb +0 -32
  194. data/lib/llm_cost_tracker/pricing/components.rb +0 -37
  195. data/lib/llm_cost_tracker/pricing/sync/registry_loader.rb +0 -63
@@ -15,10 +15,18 @@ module LlmCostTracker
15
15
  end
16
16
 
17
17
  def provider_names
18
- [
18
+ providers = LlmCostTracker.configuration.openai_compatible_providers
19
+ cached = @provider_names
20
+ return cached if cached && @provider_names_providers.equal?(providers)
21
+
22
+ names = [
19
23
  "openai_compatible",
20
- *LlmCostTracker.configuration.openai_compatible_providers.each_value.map(&:to_s)
24
+ *providers.each_value.map { |provider| provider.to_s.downcase }
21
25
  ].uniq.freeze
26
+ return names unless providers.frozen?
27
+
28
+ @provider_names_providers = providers
29
+ @provider_names = names
22
30
  end
23
31
 
24
32
  def parse(request_url:, request_body:, response_status:, response_body:, **)
@@ -39,13 +47,17 @@ module LlmCostTracker
39
47
  )
40
48
  end
41
49
 
42
- private
50
+ def auto_enable_stream_usage?(request_url)
51
+ openai_chat_completions_url?(request_url)
52
+ end
43
53
 
44
54
  def provider_for(request_url)
45
55
  uri = parsed_uri(request_url)
46
56
  provider_for_uri(uri) || "openai_compatible"
47
57
  end
48
58
 
59
+ private
60
+
49
61
  def provider_for_uri(uri)
50
62
  return nil unless uri
51
63
 
@@ -0,0 +1,126 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../billing/line_item"
4
+
5
+ module LlmCostTracker
6
+ module Parsers
7
+ module OpenaiServiceCharges
8
+ RESPONSE_OUTPUT_COMPONENTS = {
9
+ "web_search_call" => :web_search_request,
10
+ "file_search_call" => :file_search_call,
11
+ "code_interpreter_call" => :container_session,
12
+ "mcp_call" => :mcp_call
13
+ }.freeze
14
+
15
+ REASONING_MODEL_PATTERNS = [
16
+ /\Agpt-5(\b|[\d.-])/i,
17
+ /\Ao\d+(\b|[\d.-])/i
18
+ ].freeze
19
+ NON_REASONING_GPT5_PATTERN = /\Agpt-5(?:\.\d+)?-chat\b/i
20
+ private_constant :NON_REASONING_GPT5_PATTERN
21
+
22
+ module_function
23
+
24
+ def line_items_from_output(output_items, request: nil, model: nil)
25
+ deduped = {}
26
+ Array(output_items).each { |item| store_output_item(deduped, item) }
27
+ deduped.values
28
+ .select { |item| billable?(item) }
29
+ .filter_map { |item| build_line_item(item, request: request, model: model) }
30
+ end
31
+
32
+ def billable?(item)
33
+ return false unless item.is_a?(Hash)
34
+
35
+ component = RESPONSE_OUTPUT_COMPONENTS[item["type"]]
36
+ return false unless component
37
+ return true unless component == :web_search_request
38
+
39
+ action_type = item.dig("action", "type")
40
+ action_type.nil? || action_type == "search"
41
+ end
42
+
43
+ def store_output_item(output_items, item)
44
+ return unless item.is_a?(Hash) && RESPONSE_OUTPUT_COMPONENTS.key?(item["type"])
45
+
46
+ component = RESPONSE_OUTPUT_COMPONENTS[item["type"]]
47
+ key = if component == :container_session && item["container_id"]
48
+ "#{component}:#{item['container_id']}"
49
+ else
50
+ item["id"] || "#{item['type']}:#{output_items.length}"
51
+ end
52
+ output_items[key] = item
53
+ end
54
+
55
+ def build_line_item(item, request: nil, model: nil)
56
+ return nil unless item.is_a?(Hash)
57
+
58
+ component_key = component_key_for(item, request: request, model: model)
59
+ return nil unless component_key
60
+
61
+ provider_item_id = if component_key == :container_session
62
+ item["container_id"] || item["id"]
63
+ else
64
+ item["id"]
65
+ end
66
+ Billing::LineItem.build(
67
+ component_key: component_key,
68
+ quantity: 1,
69
+ cost_status: Billing::CostStatus::UNKNOWN,
70
+ pricing_basis: :provider_usage,
71
+ provider_field: "response.output.#{item['type']}",
72
+ provider_item_id: provider_item_id,
73
+ details: line_item_details(item)
74
+ )
75
+ end
76
+
77
+ def component_key_for(item, request:, model:)
78
+ component = RESPONSE_OUTPUT_COMPONENTS[item["type"]]
79
+ return component unless component == :web_search_request
80
+ return component unless web_search_preview_used?(request)
81
+
82
+ reasoning_model?(model) ? :web_search_preview_request_reasoning : :web_search_preview_request_non_reasoning
83
+ end
84
+
85
+ def web_search_preview_used?(request)
86
+ tools = request && (request[:tools] || request["tools"])
87
+ return false unless tools.respond_to?(:each)
88
+
89
+ tools.any? do |tool|
90
+ type = tool.is_a?(Hash) ? (tool[:type] || tool["type"]) : tool
91
+ type.to_s.include?("web_search_preview")
92
+ end
93
+ end
94
+
95
+ def reasoning_model?(model)
96
+ return false unless model
97
+
98
+ name = model.to_s.split("/", 2).last
99
+ return false if NON_REASONING_GPT5_PATTERN.match?(name)
100
+
101
+ REASONING_MODEL_PATTERNS.any? { |pattern| pattern.match?(name) }
102
+ end
103
+
104
+ def line_item_details(item)
105
+ {
106
+ "status" => item["status"],
107
+ "action_type" => item.dig("action", "type"),
108
+ "container_id" => item["container_id"]
109
+ }.compact
110
+ end
111
+
112
+ def openai_service_line_items(response, request: nil)
113
+ line_items_from_output(response["output"], request: request, model: response["model"])
114
+ end
115
+
116
+ def openai_stream_service_line_items(events, request: nil, model: nil)
117
+ output_items = []
118
+ each_event_data(events) do |data|
119
+ output_items.concat(Array(data.dig("response", "output")))
120
+ output_items << data["item"] if data["item"]
121
+ end
122
+ line_items_from_output(output_items, request: request, model: model)
123
+ end
124
+ end
125
+ end
126
+ end
@@ -1,8 +1,33 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "openai_service_charges"
4
+
3
5
  module LlmCostTracker
4
6
  module Parsers
5
7
  module OpenaiUsage
8
+ include OpenaiServiceCharges
9
+
10
+ OPENAI_DATA_RESIDENCY_HOST_PATTERN = /\A[a-z]{2,3}\.api\.openai\.com\z/
11
+
12
+ class << self
13
+ def combined_pricing_mode(host:, model:, service_tier:)
14
+ modes = [Pricing.normalize_mode(service_tier)]
15
+ modes << "data_residency" if regional_processing?(host: host, model: model)
16
+ modes = modes.compact.uniq
17
+ modes.empty? ? nil : modes.join("_")
18
+ end
19
+
20
+ def regional_processing?(host:, model:)
21
+ host.to_s.downcase.match?(OPENAI_DATA_RESIDENCY_HOST_PATTERN) && data_residency_model?(model)
22
+ end
23
+
24
+ def data_residency_model?(model)
25
+ model.to_s.match?(
26
+ /\Agpt-5\.(?:4|5)(?:-(?:mini|nano|pro|codex(?:-mini|-max)?))?(?:-\d{4}-\d{2}-\d{2})?\z/
27
+ )
28
+ end
29
+ end
30
+
6
31
  private
7
32
 
8
33
  def parse_openai_usage(request_url:, request_body:, response_status:, response_body:)
@@ -26,8 +51,9 @@ module LlmCostTracker
26
51
  service_tier: response["service_tier"] || request["service_tier"]
27
52
  ),
28
53
  model: model,
29
- token_usage: token_usage(usage: usage, cache_read: cache_read),
30
- usage_source: :response
54
+ token_usage: token_usage(usage: usage, cache_read: cache_read, model: model),
55
+ usage_source: :response,
56
+ service_line_items: openai_service_line_items(response, request: request)
31
57
  )
32
58
  end
33
59
 
@@ -35,99 +61,171 @@ module LlmCostTracker
35
61
  return nil unless response_status == 200
36
62
 
37
63
  request = safe_json_parse(request_body)
38
- model =
39
- find_event_value(events) { |data| data["model"] || data.dig("response", "model") } || request["model"]
40
64
  usage = detect_stream_usage(events)
41
- response_id = find_event_value(events) { |data| data["id"] || data.dig("response", "id") }
42
- pricing_mode = pricing_mode(
43
- request_url: request_url,
65
+ context = stream_capture_context(events: events, request: request, request_url: request_url)
66
+
67
+ return build_known_stream_usage(usage: usage, **context) if usage
68
+
69
+ warn_missing_stream_usage(request_url: request_url, request: request)
70
+ build_unknown_stream_usage(**context)
71
+ end
72
+
73
+ def stream_capture_context(events:, request:, request_url:)
74
+ model = find_event_value(events) do |data|
75
+ data["model"] || data.dig("response", "model") || data.dig("chunk", "model")
76
+ end || request["model"]
77
+ {
78
+ provider: provider_for(request_url),
79
+ model: model,
80
+ provider_response_id: find_event_value(events) do |data|
81
+ data["id"] || data.dig("response", "id") || data.dig("chunk", "id")
82
+ end,
83
+ pricing_mode: pricing_mode(
84
+ request_url: request_url,
85
+ model: model,
86
+ service_tier: stream_pricing_mode(events) || request["service_tier"]
87
+ ),
88
+ service_line_items: openai_stream_service_line_items(events, request: request, model: model)
89
+ }
90
+ end
91
+
92
+ def build_known_stream_usage(usage:, provider:, model:, provider_response_id:, pricing_mode:, service_line_items:)
93
+ cache_read = cache_read_input_tokens(usage)
94
+ UsageCapture.build(
95
+ provider: provider,
96
+ provider_response_id: provider_response_id,
97
+ pricing_mode: pricing_mode,
44
98
  model: model,
45
- service_tier: stream_pricing_mode(events) || request["service_tier"]
99
+ token_usage: token_usage(usage: usage, cache_read: cache_read, model: model),
100
+ stream: true,
101
+ usage_source: :stream_final,
102
+ service_line_items: service_line_items
46
103
  )
104
+ end
47
105
 
48
- if usage
49
- cache_read = cache_read_input_tokens(usage)
50
- UsageCapture.build(
51
- provider: provider_for(request_url),
52
- provider_response_id: response_id,
53
- pricing_mode: pricing_mode,
54
- model: model,
55
- token_usage: token_usage(usage: usage, cache_read: cache_read),
56
- stream: true,
57
- usage_source: :stream_final
58
- )
59
- else
60
- build_unknown_stream_usage(
61
- provider: provider_for(request_url),
62
- model: model,
63
- provider_response_id: response_id,
64
- pricing_mode: pricing_mode
65
- )
66
- end
106
+ def warn_missing_stream_usage(request_url:, request:)
107
+ return unless request.is_a?(Hash) && request["stream"]
108
+ return unless openai_chat_completions_url?(request_url)
109
+ return if request.dig("stream_options", "include_usage")
110
+
111
+ Logging.warn(
112
+ "OpenAI-compatible chat-completions stream finished without a final usage chunk. " \
113
+ "Set `stream_options: { include_usage: true }` in your request body so the gem can " \
114
+ "record token counts. This call was stored with usage_source=unknown."
115
+ )
116
+ end
117
+
118
+ def openai_chat_completions_url?(request_url)
119
+ uri = parsed_uri(request_url)
120
+ uri && uri.path.to_s.end_with?("/chat/completions")
67
121
  end
68
122
 
69
123
  def detect_stream_usage(events)
70
124
  find_event_value(events, reverse: true) do |data|
71
- usage = data["usage"] || data.dig("response", "usage")
125
+ usage = data["usage"] || data.dig("response", "usage") || data.dig("chunk", "usage")
72
126
  usage if usage.is_a?(Hash)
73
127
  end
74
128
  end
75
129
 
76
130
  def stream_pricing_mode(events)
77
131
  find_event_value(events, reverse: true) do |data|
78
- data["service_tier"] || data.dig("response", "service_tier")
132
+ data["service_tier"] || data.dig("response", "service_tier") || data.dig("chunk", "service_tier")
79
133
  end
80
134
  end
81
135
 
82
136
  def pricing_mode(request_url:, model:, service_tier:)
83
- modes = [Pricing.normalize_mode(service_tier)]
84
- modes << "data_residency" if openai_regional_processing?(request_url: request_url, model: model)
85
- modes = modes.compact.uniq
86
- modes.empty? ? nil : modes.join("_")
137
+ OpenaiUsage.combined_pricing_mode(host: parsed_uri(request_url)&.host, model: model, service_tier: service_tier)
87
138
  end
88
139
 
89
- def openai_regional_processing?(request_url:, model:)
90
- uri = parsed_uri(request_url)
91
- return false unless %w[us.api.openai.com eu.api.openai.com].include?(uri&.host.to_s.downcase)
92
-
93
- openai_data_residency_model?(model)
94
- end
95
-
96
- def openai_data_residency_model?(model)
97
- model.to_s.match?(/\Agpt-5\.(?:4|5)(?:-(?:mini|nano|pro))?(?:-\d{4}-\d{2}-\d{2})?\z/)
98
- end
140
+ IMAGE_OUTPUT_MODEL_PATTERN = /\Agpt-image-/i
141
+ private_constant :IMAGE_OUTPUT_MODEL_PATTERN
142
+
143
+ def token_usage(usage:, cache_read:, model: nil)
144
+ audio_input = audio_input_tokens(usage)
145
+ audio_output = audio_output_tokens(usage)
146
+ image_input = image_input_tokens(usage)
147
+ image_output_details = image_output_tokens(usage)
148
+ text_output_details = text_output_tokens(usage)
149
+ raw_output = (usage["completion_tokens"] || usage["output_tokens"]).to_i
150
+ image_output, regular_output_remainder = split_stream_image_output(
151
+ raw_output: raw_output, image_output_details: image_output_details,
152
+ text_output_details: text_output_details, audio_output: audio_output,
153
+ default_to_image: model.to_s.match?(IMAGE_OUTPUT_MODEL_PATTERN)
154
+ )
99
155
 
100
- def token_usage(usage:, cache_read:)
101
156
  TokenUsage.build(
102
- input_tokens: regular_input_tokens(usage: usage, cache_read: cache_read),
103
- output_tokens: (usage["completion_tokens"] || usage["output_tokens"]).to_i,
104
- total_tokens: total_tokens(usage: usage, cache_read: cache_read),
157
+ input_tokens: regular_input_tokens(
158
+ usage: usage, cache_read: cache_read, audio_input: audio_input, image_input: image_input
159
+ ),
160
+ output_tokens: regular_output_remainder,
161
+ total_tokens: usage["total_tokens"],
105
162
  cache_read_input_tokens: cache_read,
163
+ audio_input_tokens: audio_input,
164
+ audio_output_tokens: audio_output,
165
+ image_input_tokens: image_input,
166
+ image_output_tokens: image_output,
106
167
  hidden_output_tokens: hidden_output_tokens(usage)
107
168
  )
108
169
  end
109
170
 
110
- def regular_input_tokens(usage:, cache_read:)
111
- [(usage["prompt_tokens"] || usage["input_tokens"]).to_i - cache_read.to_i, 0].max
171
+ def split_stream_image_output(raw_output:, image_output_details:, text_output_details:, audio_output:,
172
+ default_to_image: false)
173
+ if image_output_details.zero? && text_output_details.zero?
174
+ remainder = [raw_output - audio_output, 0].max
175
+ return default_to_image ? [remainder, 0] : [0, remainder]
176
+ end
177
+
178
+ text_output = text_output_details
179
+ text_output = [raw_output - image_output_details - audio_output, 0].max if text_output.zero?
180
+ [image_output_details, text_output]
181
+ end
182
+
183
+ def regular_input_tokens(usage:, cache_read:, audio_input:, image_input:)
184
+ raw = (usage["prompt_tokens"] || usage["input_tokens"]).to_i
185
+ [raw - cache_read - audio_input - image_input, 0].max
112
186
  end
113
187
 
114
188
  def cache_read_input_tokens(usage)
115
- details = usage["prompt_tokens_details"] || usage["input_tokens_details"] || {}
116
- details["cached_tokens"]
189
+ details = input_token_details(usage)
190
+ details["cached_tokens"].to_i
191
+ end
192
+
193
+ def audio_input_tokens(usage)
194
+ details = input_token_details(usage)
195
+ details["audio_tokens"].to_i
117
196
  end
118
197
 
119
198
  def hidden_output_tokens(usage)
120
- details = usage["completion_tokens_details"] || usage["output_tokens_details"] || {}
121
- details["reasoning_tokens"]
199
+ details = output_token_details(usage)
200
+ details["reasoning_tokens"].to_i
201
+ end
202
+
203
+ def audio_output_tokens(usage)
204
+ details = output_token_details(usage)
205
+ details["audio_tokens"].to_i
122
206
  end
123
207
 
124
- def total_tokens(usage:, cache_read:)
125
- total = usage["total_tokens"]
126
- return total.to_i unless total.nil?
208
+ def image_input_tokens(usage)
209
+ details = input_token_details(usage)
210
+ details["image_tokens"].to_i
211
+ end
212
+
213
+ def image_output_tokens(usage)
214
+ details = output_token_details(usage)
215
+ details["image_tokens"].to_i
216
+ end
217
+
218
+ def text_output_tokens(usage)
219
+ details = output_token_details(usage)
220
+ details["text_tokens"].to_i
221
+ end
222
+
223
+ def input_token_details(usage)
224
+ usage["prompt_tokens_details"] || usage["input_tokens_details"] || usage["input_token_details"] || {}
225
+ end
127
226
 
128
- regular_input_tokens(usage: usage, cache_read: cache_read) +
129
- cache_read.to_i +
130
- (usage["completion_tokens"] || usage["output_tokens"]).to_i
227
+ def output_token_details(usage)
228
+ usage["completion_tokens_details"] || usage["output_tokens_details"] || usage["output_token_details"] || {}
131
229
  end
132
230
  end
133
231
  end
@@ -12,7 +12,7 @@ module LlmCostTracker
12
12
  def parse(body)
13
13
  return [] if body.blank?
14
14
 
15
- return parse_json_array(body) if body.lstrip.start_with?("[")
15
+ return parse_json_array(body) if body.match?(/\A\s*\[/)
16
16
 
17
17
  parse_event_stream(body)
18
18
  end
@@ -13,7 +13,7 @@ module LlmCostTracker
13
13
  def find_for_provider(provider)
14
14
  provider_name = provider.to_s.downcase
15
15
  BUILT_INS.find do |parser|
16
- Array(parser.provider_names).map { |name| name.to_s.downcase }.include?(provider_name)
16
+ parser.provider_names.include?(provider_name)
17
17
  end
18
18
  end
19
19
  end