llm_cost_tracker 0.7.3 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. checksums.yaml +4 -4
  2. data/.ruby-version +1 -0
  3. data/CHANGELOG.md +173 -0
  4. data/README.md +60 -220
  5. data/app/assets/llm_cost_tracker/application.css +282 -45
  6. data/app/controllers/llm_cost_tracker/application_controller.rb +25 -20
  7. data/app/controllers/llm_cost_tracker/assets_controller.rb +11 -1
  8. data/app/controllers/llm_cost_tracker/calls_controller.rb +22 -19
  9. data/app/controllers/llm_cost_tracker/data_quality_controller.rb +14 -2
  10. data/app/controllers/llm_cost_tracker/reconciliation_controller.rb +106 -0
  11. data/app/controllers/llm_cost_tracker/tags_controller.rb +15 -1
  12. data/app/helpers/llm_cost_tracker/application_helper.rb +18 -21
  13. data/app/helpers/llm_cost_tracker/dashboard_filter_helper.rb +3 -21
  14. data/app/helpers/llm_cost_tracker/dashboard_filter_options_helper.rb +4 -4
  15. data/app/helpers/llm_cost_tracker/dashboard_query_helper.rb +1 -1
  16. data/app/helpers/llm_cost_tracker/inline_style_helper.rb +28 -0
  17. data/app/helpers/llm_cost_tracker/reconciliation_helper.rb +13 -0
  18. data/app/helpers/llm_cost_tracker/token_usage_helper.rb +24 -7
  19. data/app/models/llm_cost_tracker/call.rb +166 -0
  20. data/app/models/llm_cost_tracker/call_line_item.rb +18 -0
  21. data/app/models/llm_cost_tracker/call_rollup.rb +6 -0
  22. data/app/models/llm_cost_tracker/call_tag.rb +12 -0
  23. data/app/models/llm_cost_tracker/ingestion/inbox_entry.rb +9 -0
  24. data/app/models/llm_cost_tracker/ingestion/lease.rb +0 -3
  25. data/app/models/llm_cost_tracker/provider_invoice.rb +13 -0
  26. data/app/models/llm_cost_tracker/provider_invoice_import.rb +24 -0
  27. data/app/services/llm_cost_tracker/dashboard/data_quality.rb +152 -32
  28. data/app/services/llm_cost_tracker/dashboard/date_range.rb +1 -1
  29. data/app/services/llm_cost_tracker/dashboard/filter.rb +8 -6
  30. data/app/services/llm_cost_tracker/dashboard/overview_stats.rb +74 -21
  31. data/app/services/llm_cost_tracker/dashboard/pagination.rb +6 -4
  32. data/app/services/llm_cost_tracker/dashboard/params.rb +8 -2
  33. data/app/services/llm_cost_tracker/dashboard/provider_breakdown.rb +1 -1
  34. data/app/services/llm_cost_tracker/dashboard/spend_anomaly.rb +4 -3
  35. data/app/services/llm_cost_tracker/dashboard/tag_breakdown.rb +42 -9
  36. data/app/services/llm_cost_tracker/dashboard/tag_key_explorer.rb +14 -37
  37. data/app/services/llm_cost_tracker/dashboard/time_series.rb +1 -1
  38. data/app/services/llm_cost_tracker/dashboard/top_models.rb +1 -1
  39. data/app/views/layouts/llm_cost_tracker/application.html.erb +6 -1
  40. data/app/views/llm_cost_tracker/calls/index.html.erb +33 -75
  41. data/app/views/llm_cost_tracker/calls/show.html.erb +73 -33
  42. data/app/views/llm_cost_tracker/dashboard/index.html.erb +16 -57
  43. data/app/views/llm_cost_tracker/data_quality/index.html.erb +183 -167
  44. data/app/views/llm_cost_tracker/errors/database.html.erb +1 -1
  45. data/app/views/llm_cost_tracker/models/index.html.erb +18 -50
  46. data/app/views/llm_cost_tracker/reconciliation/index.html.erb +183 -0
  47. data/app/views/llm_cost_tracker/shared/_bar.html.erb +1 -1
  48. data/app/views/llm_cost_tracker/shared/_filters.html.erb +66 -0
  49. data/app/views/llm_cost_tracker/shared/_metric_stack.html.erb +1 -1
  50. data/app/views/llm_cost_tracker/shared/_sort.html.erb +13 -0
  51. data/app/views/llm_cost_tracker/shared/setup_required.html.erb +1 -1
  52. data/app/views/llm_cost_tracker/tags/index.html.erb +3 -34
  53. data/app/views/llm_cost_tracker/tags/show.html.erb +64 -36
  54. data/config/routes.rb +3 -2
  55. data/lib/llm_cost_tracker/billing/components.rb +95 -0
  56. data/lib/llm_cost_tracker/billing/components.yml +188 -0
  57. data/lib/llm_cost_tracker/billing/cost_status.rb +45 -0
  58. data/lib/llm_cost_tracker/billing/line_item.rb +189 -0
  59. data/lib/llm_cost_tracker/budget.rb +26 -36
  60. data/lib/llm_cost_tracker/capture/stream_collector.rb +125 -38
  61. data/lib/llm_cost_tracker/capture/stream_tracker.rb +40 -5
  62. data/lib/llm_cost_tracker/configuration.rb +86 -17
  63. data/lib/llm_cost_tracker/dashboard_setup_state.rb +109 -0
  64. data/lib/llm_cost_tracker/doctor/cost_drift_check.rb +56 -0
  65. data/lib/llm_cost_tracker/doctor/ingestion_check.rb +48 -30
  66. data/lib/llm_cost_tracker/doctor/invoice_reconciliation_check.rb +164 -0
  67. data/lib/llm_cost_tracker/doctor/legacy_audit_check.rb +36 -0
  68. data/lib/llm_cost_tracker/doctor/legacy_billing_status_check.rb +22 -0
  69. data/lib/llm_cost_tracker/doctor/price_check.rb +2 -2
  70. data/lib/llm_cost_tracker/doctor/pricing_snapshot_drift_check.rb +85 -0
  71. data/lib/llm_cost_tracker/doctor/probe.rb +17 -0
  72. data/lib/llm_cost_tracker/doctor/schema_check.rb +34 -0
  73. data/lib/llm_cost_tracker/doctor.rb +111 -44
  74. data/lib/llm_cost_tracker/engine.rb +9 -0
  75. data/lib/llm_cost_tracker/errors.rb +5 -19
  76. data/lib/llm_cost_tracker/event.rb +11 -3
  77. data/lib/llm_cost_tracker/generators/llm_cost_tracker/call_rollups_generator.rb +43 -0
  78. data/lib/llm_cost_tracker/generators/llm_cost_tracker/durable_ingestion_generator.rb +43 -0
  79. data/lib/llm_cost_tracker/generators/llm_cost_tracker/install_generator.rb +17 -5
  80. data/lib/llm_cost_tracker/generators/llm_cost_tracker/prices_generator.rb +2 -6
  81. data/lib/llm_cost_tracker/generators/llm_cost_tracker/reconciliation_generator.rb +34 -0
  82. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_call_rollups.rb.erb +15 -0
  83. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_calls.rb.erb +104 -0
  84. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_durable_ingestion.rb.erb +29 -0
  85. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_reconciliation.rb.erb +55 -0
  86. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +28 -25
  87. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_call_rollups_provider.rb.erb +20 -0
  88. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_call_tags_key_value_index.rb.erb +32 -0
  89. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_image_tokens.rb.erb +18 -0
  90. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_call_rollups_provider_generator.rb +38 -0
  91. data/lib/llm_cost_tracker/generators/llm_cost_tracker/{add_provider_response_id_generator.rb → upgrade_call_tags_key_value_index_generator.rb} +5 -4
  92. data/lib/llm_cost_tracker/generators/llm_cost_tracker/{add_streaming_generator.rb → upgrade_image_tokens_generator.rb} +4 -4
  93. data/lib/llm_cost_tracker/ingestion/batch.rb +11 -12
  94. data/lib/llm_cost_tracker/ingestion/inbox.rb +39 -24
  95. data/lib/llm_cost_tracker/ingestion/inline.rb +22 -0
  96. data/lib/llm_cost_tracker/ingestion/worker.rb +24 -7
  97. data/lib/llm_cost_tracker/ingestion.rb +66 -22
  98. data/lib/llm_cost_tracker/integrations/anthropic.rb +68 -42
  99. data/lib/llm_cost_tracker/integrations/base.rb +56 -32
  100. data/lib/llm_cost_tracker/integrations/openai.rb +342 -63
  101. data/lib/llm_cost_tracker/integrations/ruby_llm.rb +110 -11
  102. data/lib/llm_cost_tracker/integrations.rb +21 -3
  103. data/lib/llm_cost_tracker/ledger/period/totals.rb +30 -11
  104. data/lib/llm_cost_tracker/ledger/period.rb +5 -5
  105. data/lib/llm_cost_tracker/ledger/rollups/upsert_sql.rb +2 -2
  106. data/lib/llm_cost_tracker/ledger/rollups.rb +90 -25
  107. data/lib/llm_cost_tracker/ledger/schema/adapter.rb +18 -0
  108. data/lib/llm_cost_tracker/ledger/schema/call_line_items.rb +79 -0
  109. data/lib/llm_cost_tracker/ledger/schema/call_rollups.rb +37 -0
  110. data/lib/llm_cost_tracker/ledger/schema/call_tags.rb +41 -0
  111. data/lib/llm_cost_tracker/ledger/schema/calls.rb +36 -23
  112. data/lib/llm_cost_tracker/ledger/schema/ingestion_inbox_entries.rb +47 -0
  113. data/lib/llm_cost_tracker/ledger/schema/ingestion_leases.rb +42 -0
  114. data/lib/llm_cost_tracker/ledger/schema/provider_invoice_imports.rb +46 -0
  115. data/lib/llm_cost_tracker/ledger/schema/provider_invoices.rb +57 -0
  116. data/lib/llm_cost_tracker/ledger/store.rb +103 -20
  117. data/lib/llm_cost_tracker/ledger/tags/encoding.rb +37 -0
  118. data/lib/llm_cost_tracker/ledger/tags/query.rb +6 -11
  119. data/lib/llm_cost_tracker/ledger/tags/sql.rb +27 -15
  120. data/lib/llm_cost_tracker/ledger.rb +5 -2
  121. data/lib/llm_cost_tracker/logging.rb +2 -5
  122. data/lib/llm_cost_tracker/masking.rb +39 -0
  123. data/lib/llm_cost_tracker/middleware/faraday.rb +95 -35
  124. data/lib/llm_cost_tracker/parsers/anthropic.rb +74 -14
  125. data/lib/llm_cost_tracker/parsers/base.rb +13 -4
  126. data/lib/llm_cost_tracker/parsers/gemini.rb +105 -15
  127. data/lib/llm_cost_tracker/parsers/openai.rb +16 -2
  128. data/lib/llm_cost_tracker/parsers/openai_compatible.rb +15 -3
  129. data/lib/llm_cost_tracker/parsers/openai_service_charges.rb +126 -0
  130. data/lib/llm_cost_tracker/parsers/openai_usage.rb +157 -59
  131. data/lib/llm_cost_tracker/parsers/sse.rb +1 -1
  132. data/lib/llm_cost_tracker/parsers.rb +1 -1
  133. data/lib/llm_cost_tracker/prices.json +198 -22
  134. data/lib/llm_cost_tracker/pricing/effective_prices.rb +28 -21
  135. data/lib/llm_cost_tracker/pricing/explainer.rb +4 -5
  136. data/lib/llm_cost_tracker/pricing/lookup.rb +73 -36
  137. data/lib/llm_cost_tracker/pricing/mode.rb +76 -0
  138. data/lib/llm_cost_tracker/pricing/registry.rb +67 -45
  139. data/lib/llm_cost_tracker/pricing/service_charges.rb +210 -0
  140. data/lib/llm_cost_tracker/pricing/sync/fetcher.rb +26 -17
  141. data/lib/llm_cost_tracker/pricing/sync/registry_diff.rb +6 -15
  142. data/lib/llm_cost_tracker/pricing/sync/registry_writer.rb +50 -1
  143. data/lib/llm_cost_tracker/pricing/sync.rb +59 -10
  144. data/lib/llm_cost_tracker/pricing/sync_change_printer.rb +32 -0
  145. data/lib/llm_cost_tracker/pricing.rb +220 -28
  146. data/lib/llm_cost_tracker/railtie.rb +6 -8
  147. data/lib/llm_cost_tracker/reconcile_tasks.rb +134 -0
  148. data/lib/llm_cost_tracker/reconciliation/diff.rb +428 -0
  149. data/lib/llm_cost_tracker/reconciliation/diff_result.rb +48 -0
  150. data/lib/llm_cost_tracker/reconciliation/import_result.rb +19 -0
  151. data/lib/llm_cost_tracker/reconciliation/importer.rb +253 -0
  152. data/lib/llm_cost_tracker/reconciliation/sources/anthropic_usage.rb +171 -0
  153. data/lib/llm_cost_tracker/reconciliation/sources/fingerprint.rb +20 -0
  154. data/lib/llm_cost_tracker/reconciliation/sources/openai_usage.rb +142 -0
  155. data/lib/llm_cost_tracker/reconciliation.rb +118 -0
  156. data/lib/llm_cost_tracker/report/data.rb +19 -8
  157. data/lib/llm_cost_tracker/report.rb +0 -4
  158. data/lib/llm_cost_tracker/retention.rb +22 -9
  159. data/lib/llm_cost_tracker/tags/context.rb +2 -5
  160. data/lib/llm_cost_tracker/tags/key.rb +4 -0
  161. data/lib/llm_cost_tracker/tags/sanitizer.rb +71 -20
  162. data/lib/llm_cost_tracker/timing.rb +15 -0
  163. data/lib/llm_cost_tracker/token_usage.rb +64 -42
  164. data/lib/llm_cost_tracker/tracker.rb +97 -27
  165. data/lib/llm_cost_tracker/usage_capture.rb +29 -8
  166. data/lib/llm_cost_tracker/version.rb +1 -1
  167. data/lib/llm_cost_tracker.rb +45 -35
  168. data/lib/tasks/llm_cost_tracker.rake +45 -17
  169. metadata +71 -41
  170. data/app/models/llm_cost_tracker/ingestion/event.rb +0 -13
  171. data/app/models/llm_cost_tracker/ledger/call.rb +0 -45
  172. data/app/models/llm_cost_tracker/ledger/call_metrics.rb +0 -66
  173. data/app/models/llm_cost_tracker/ledger/period/grouping.rb +0 -71
  174. data/app/models/llm_cost_tracker/ledger/period/total.rb +0 -13
  175. data/app/models/llm_cost_tracker/ledger/tags/accessors.rb +0 -19
  176. data/lib/llm_cost_tracker/configuration/instrumentation.rb +0 -33
  177. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_ingestion_generator.rb +0 -29
  178. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_latency_ms_generator.rb +0 -29
  179. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_period_totals_generator.rb +0 -29
  180. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_token_usage_generator.rb +0 -42
  181. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_ingestion_to_llm_cost_tracker.rb.erb +0 -33
  182. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_latency_ms_to_llm_api_calls.rb.erb +0 -9
  183. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_period_totals_to_llm_cost_tracker.rb.erb +0 -104
  184. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_provider_response_id_to_llm_api_calls.rb.erb +0 -15
  185. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_streaming_to_llm_api_calls.rb.erb +0 -21
  186. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_token_usage_to_llm_api_calls.rb.erb +0 -22
  187. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_api_calls.rb.erb +0 -83
  188. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_cost_precision.rb.erb +0 -26
  189. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_tags_to_jsonb.rb.erb +0 -44
  190. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_cost_precision_generator.rb +0 -29
  191. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_tags_to_jsonb_generator.rb +0 -29
  192. data/lib/llm_cost_tracker/ledger/rollups/batch.rb +0 -43
  193. data/lib/llm_cost_tracker/ledger/schema/period_totals.rb +0 -32
  194. data/lib/llm_cost_tracker/pricing/components.rb +0 -37
  195. data/lib/llm_cost_tracker/pricing/sync/registry_loader.rb +0 -63
@@ -1,12 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "base"
4
- require_relative "../capture/stream_collector"
5
- require_relative "../capture/stream_tracker"
4
+ require_relative "../billing/line_item"
5
+ require_relative "../parsers/openai_service_charges"
6
6
 
7
7
  module LlmCostTracker
8
8
  module Integrations
9
- module Openai
9
+ module Openai # rubocop:disable Metrics/ModuleLength
10
10
  extend Base
11
11
 
12
12
  class << self
@@ -14,6 +14,32 @@ module LlmCostTracker
14
14
  :openai
15
15
  end
16
16
 
17
+ def stream_pricing_mode(request, host: nil)
18
+ LlmCostTracker::Parsers::OpenaiUsage.combined_pricing_mode(
19
+ host: host,
20
+ model: (request || {})[:model],
21
+ service_tier: (request || {})[:service_tier]
22
+ )
23
+ end
24
+
25
+ def stream_collector(request, host: nil)
26
+ LlmCostTracker::Capture::StreamCollector.new(
27
+ provider: integration_name.to_s,
28
+ model: request[:model],
29
+ pricing_mode: stream_pricing_mode(request, host: host),
30
+ request: request
31
+ )
32
+ end
33
+
34
+ def client_host_for(resource)
35
+ client = resource.instance_variable_get(:@client)
36
+ return nil unless client.respond_to?(:base_url, true)
37
+
38
+ URI.parse(client.send(:base_url).to_s).host
39
+ rescue URI::InvalidURIError
40
+ nil
41
+ end
42
+
17
43
  def minimum_version
18
44
  "0.59.0"
19
45
  end
@@ -24,20 +50,40 @@ module LlmCostTracker
24
50
 
25
51
  def patch_targets
26
52
  [
27
- patch_target(
28
- "OpenAI::Resources::Responses",
29
- with: ResponsesPatch,
30
- methods: %i[create stream stream_raw retrieve_streaming]
31
- ),
32
- patch_target(
33
- "OpenAI::Resources::Chat::Completions",
34
- with: ChatCompletionsPatch,
35
- methods: %i[create stream_raw]
36
- )
53
+ patch_target("OpenAI::Resources::Responses",
54
+ with: ResponsesPatch, methods: %i[create stream stream_raw retrieve_streaming]),
55
+ patch_target("OpenAI::Resources::Chat::Completions",
56
+ with: ChatCompletionsPatch, methods: %i[create stream stream_raw]),
57
+ *auxiliary_patch_targets
37
58
  ]
38
59
  end
39
60
 
40
- def record_response(response, request:, latency_ms:)
61
+ def auxiliary_patch_targets
62
+ [
63
+ patch_target("OpenAI::Resources::Embeddings",
64
+ with: EmbeddingsPatch, methods: %i[create], optional: true),
65
+ patch_target("OpenAI::Resources::Images",
66
+ with: ImagesPatch, methods: %i[generate edit create_variation], optional: true),
67
+ patch_target("OpenAI::Resources::Images",
68
+ with: StreamingImagesPatch,
69
+ methods: %i[generate_stream_raw edit_stream_raw],
70
+ optional: true, skip_when_methods_missing: true),
71
+ patch_target("OpenAI::Resources::Audio::Transcriptions",
72
+ with: TranscriptionsPatch, methods: %i[create], optional: true),
73
+ patch_target("OpenAI::Resources::Audio::Transcriptions",
74
+ with: StreamingTranscriptionsPatch,
75
+ methods: %i[create_streaming],
76
+ optional: true, skip_when_methods_missing: true),
77
+ patch_target("OpenAI::Resources::Audio::Translations",
78
+ with: TranslationsPatch, methods: %i[create], optional: true),
79
+ patch_target("OpenAI::Resources::Audio::Speech",
80
+ with: SpeechPatch, methods: %i[create], optional: true),
81
+ patch_target("OpenAI::Resources::Moderations",
82
+ with: ModerationsPatch, methods: %i[create], optional: true)
83
+ ]
84
+ end
85
+
86
+ def record_response(response, request:, latency_ms:, host: nil)
41
87
  return unless active?
42
88
 
43
89
  record_safely do
@@ -49,123 +95,356 @@ module LlmCostTracker
49
95
  next if input_tokens.nil? && output_tokens.nil?
50
96
 
51
97
  cache_read = cache_read_input_tokens(usage)
98
+ model = object_value(response, :model) || request[:model]
52
99
  LlmCostTracker::Tracker.record(
53
100
  capture: UsageCapture.build(
54
101
  provider: "openai",
55
- model: object_value(response, :model) || request[:model],
56
- pricing_mode: object_value(response, :service_tier) || request[:service_tier],
57
- token_usage: TokenUsage.build(
58
- input_tokens: regular_input_tokens(input_tokens, cache_read),
59
- output_tokens: output_tokens.to_i,
60
- cache_read_input_tokens: cache_read,
61
- hidden_output_tokens: hidden_output_tokens(usage)
102
+ model: model,
103
+ pricing_mode: LlmCostTracker::Parsers::OpenaiUsage.combined_pricing_mode(
104
+ host: host,
105
+ model: model,
106
+ service_tier: object_value(response, :service_tier) || request[:service_tier]
62
107
  ),
108
+ token_usage: token_usage(usage:, input_tokens:, output_tokens:, cache_read:, model: model),
63
109
  usage_source: :sdk_response,
64
- provider_response_id: object_value(response, :id)
110
+ provider_response_id: object_value(response, :id),
111
+ service_line_items: service_line_items_from(response, request: request)
65
112
  ),
66
113
  latency_ms: latency_ms
67
114
  )
68
115
  end
69
116
  end
70
117
 
71
- def cache_read_input_tokens(usage)
72
- (
73
- object_dig(usage, :input_tokens_details, :cached_tokens) ||
74
- object_dig(usage, :prompt_tokens_details, :cached_tokens)
75
- ).to_i
118
+ def record_image(response, request:, latency_ms:)
119
+ usage = object_value(response, :usage)
120
+ raw_input = usage ? object_value(usage, :input_tokens).to_i : 0
121
+ raw_output = usage ? object_value(usage, :output_tokens).to_i : 0
122
+ image_input = image_input_tokens(usage).to_i
123
+ cache_read = cache_read_input_tokens(usage).to_i
124
+ text_input = [raw_input - image_input - cache_read, 0].max
125
+ image_output, text_output = split_image_output(usage, raw_output)
126
+ record_passthrough(
127
+ model: request[:model],
128
+ response: response,
129
+ latency_ms: latency_ms,
130
+ input_tokens: text_input,
131
+ image_input_tokens: image_input,
132
+ output_tokens: text_output,
133
+ image_output_tokens: image_output,
134
+ cache_read_input_tokens: cache_read
135
+ )
76
136
  end
77
137
 
78
- def hidden_output_tokens(usage)
79
- (
80
- object_dig(usage, :output_tokens_details, :reasoning_tokens) ||
81
- object_dig(usage, :completion_tokens_details, :reasoning_tokens)
82
- ).to_i
138
+ def split_image_output(usage, raw_output)
139
+ image_tokens = image_output_tokens(usage).to_i
140
+ text_tokens = text_output_tokens(usage).to_i
141
+ return [raw_output, 0] if image_tokens.zero? && text_tokens.zero?
142
+
143
+ text_tokens = [raw_output - image_tokens, 0].max if text_tokens.zero?
144
+ [image_tokens, text_tokens]
83
145
  end
84
146
 
85
- def regular_input_tokens(input_tokens, cache_read)
86
- [input_tokens.to_i - cache_read.to_i, 0].max
147
+ def record_transcription(response, request:, latency_ms:)
148
+ record_passthrough(
149
+ model: request[:model],
150
+ response: response,
151
+ latency_ms: latency_ms,
152
+ **transcription_token_attributes(object_value(response, :usage))
153
+ )
87
154
  end
88
155
 
89
- def track_stream(stream, collector:)
90
- return stream unless active?
156
+ def transcription_token_attributes(usage)
157
+ return { input_tokens: 0, output_tokens: 0 } unless usage && object_value(usage, :type).to_s == "tokens"
91
158
 
92
- LlmCostTracker::Capture::StreamTracker.new(
93
- stream: stream,
94
- collector: collector,
95
- active: -> { active? },
96
- finish: ->(errored:) { finish_stream(collector, errored: errored) }
97
- ).wrap
159
+ raw_input = object_value(usage, :input_tokens).to_i
160
+ audio_input = object_dig(usage, :input_token_details, :audio_tokens).to_i
161
+ {
162
+ input_tokens: [raw_input - audio_input, 0].max,
163
+ audio_input_tokens: audio_input,
164
+ output_tokens: object_value(usage, :output_tokens).to_i
165
+ }
98
166
  end
99
167
 
100
- def stream_collector(request)
101
- LlmCostTracker::Capture::StreamCollector.new(
102
- provider: "openai",
103
- model: request[:model]
168
+ def record_speech(_response, request:, latency_ms:)
169
+ record_passthrough(
170
+ model: request[:model],
171
+ response: nil,
172
+ latency_ms: latency_ms,
173
+ input_tokens: 0,
174
+ output_tokens: 0,
175
+ service_line_items: speech_line_items(request)
176
+ )
177
+ end
178
+
179
+ CHARACTER_BILLED_TTS_MODELS = /\Atts-1(-hd)?\z/
180
+ private_constant :CHARACTER_BILLED_TTS_MODELS
181
+
182
+ def speech_line_items(request)
183
+ input = request[:input]
184
+ return [] unless input.is_a?(String)
185
+ return [] unless CHARACTER_BILLED_TTS_MODELS.match?(request[:model].to_s)
186
+
187
+ [LlmCostTracker::Billing::LineItem.build(
188
+ component_key: :text_to_speech_character,
189
+ quantity: input.length,
190
+ cost_status: LlmCostTracker::Billing::CostStatus::UNKNOWN,
191
+ pricing_basis: :provider_usage,
192
+ provider_field: "request.input"
193
+ )]
194
+ end
195
+
196
+ def record_moderation(response, request:, latency_ms:)
197
+ record_passthrough(
198
+ model: object_value(response, :model) || request[:model],
199
+ response: response,
200
+ latency_ms: latency_ms,
201
+ input_tokens: 0,
202
+ output_tokens: 0
203
+ )
204
+ end
205
+
206
+ def record_passthrough(model:, response:, latency_ms:, service_line_items: [], **token_attributes)
207
+ return unless active?
208
+
209
+ record_safely do
210
+ LlmCostTracker::Tracker.record(
211
+ capture: UsageCapture.build(
212
+ provider: "openai",
213
+ model: model,
214
+ token_usage: TokenUsage.build(**token_attributes),
215
+ usage_source: :sdk_response,
216
+ provider_response_id: response && object_value(response, :id),
217
+ service_line_items: service_line_items
218
+ ),
219
+ latency_ms: latency_ms
220
+ )
221
+ end
222
+ end
223
+
224
+ def service_line_items_from(response, request: nil)
225
+ output = object_value(response, :output)
226
+ return [] unless output.respond_to?(:each)
227
+
228
+ LlmCostTracker::Parsers::OpenaiServiceCharges.line_items_from_output(
229
+ output.map { |item| normalize_output_item(item) },
230
+ request: request,
231
+ model: object_value(response, :model) || request&.dig(:model)
232
+ )
233
+ end
234
+
235
+ def normalize_output_item(item)
236
+ return item if item.is_a?(Hash)
237
+ return nil if item.nil?
238
+
239
+ {
240
+ "type" => object_value(item, :type)&.to_s,
241
+ "id" => object_value(item, :id),
242
+ "status" => object_value(item, :status),
243
+ "container_id" => object_value(item, :container_id),
244
+ "action" => normalize_output_action(object_value(item, :action))
245
+ }
246
+ end
247
+
248
+ def normalize_output_action(action)
249
+ return nil if action.nil?
250
+ return action if action.is_a?(Hash)
251
+
252
+ { "type" => object_value(action, :type)&.to_s }
253
+ end
254
+
255
+ IMAGE_OUTPUT_MODEL_PATTERN = /\Agpt-image-/i
256
+ private_constant :IMAGE_OUTPUT_MODEL_PATTERN
257
+
258
+ def token_usage(usage:, input_tokens:, output_tokens:, cache_read:, model: nil)
259
+ audio_input = audio_input_tokens(usage)
260
+ audio_output = audio_output_tokens(usage)
261
+ image_input = image_input_tokens(usage)
262
+ image_output_details = image_output_tokens(usage)
263
+ text_output_details = text_output_tokens(usage)
264
+ image_output, regular_output = split_responses_image_output(
265
+ output_tokens: output_tokens.to_i,
266
+ image_output_details: image_output_details,
267
+ text_output_details: text_output_details,
268
+ audio_output: audio_output,
269
+ default_to_image: model.to_s.match?(IMAGE_OUTPUT_MODEL_PATTERN)
270
+ )
271
+
272
+ TokenUsage.build(
273
+ input_tokens: regular_input_tokens(input_tokens, cache_read, audio_input, image_input),
274
+ output_tokens: regular_output,
275
+ cache_read_input_tokens: cache_read,
276
+ audio_input_tokens: audio_input,
277
+ audio_output_tokens: audio_output,
278
+ image_input_tokens: image_input,
279
+ image_output_tokens: image_output,
280
+ hidden_output_tokens: hidden_output_tokens(usage)
104
281
  )
105
282
  end
106
283
 
107
- def finish_stream(collector, errored:)
108
- record_safely { collector.finish!(errored: errored) }
284
+ INPUT_DETAIL_KEYS = %i[input_tokens_details input_token_details prompt_tokens_details].freeze
285
+ OUTPUT_DETAIL_KEYS = %i[output_tokens_details output_token_details completion_tokens_details].freeze
286
+
287
+ def cache_read_input_tokens(usage) = detail(usage, INPUT_DETAIL_KEYS, :cached_tokens)
288
+ def hidden_output_tokens(usage) = detail(usage, OUTPUT_DETAIL_KEYS, :reasoning_tokens)
289
+ def audio_input_tokens(usage) = detail(usage, INPUT_DETAIL_KEYS, :audio_tokens)
290
+ def audio_output_tokens(usage) = detail(usage, OUTPUT_DETAIL_KEYS, :audio_tokens)
291
+ def image_input_tokens(usage) = detail(usage, INPUT_DETAIL_KEYS, :image_tokens)
292
+ def image_output_tokens(usage) = detail(usage, OUTPUT_DETAIL_KEYS, :image_tokens)
293
+ def text_output_tokens(usage) = detail(usage, OUTPUT_DETAIL_KEYS, :text_tokens)
294
+
295
+ def detail(usage, containers, key)
296
+ containers.each do |container|
297
+ value = object_dig(usage, container, key)
298
+ return value.to_i if value
299
+ end
300
+ 0
301
+ end
302
+
303
+ def regular_input_tokens(input_tokens, cache_read, audio_input, image_input)
304
+ [input_tokens.to_i - cache_read - audio_input - image_input, 0].max
305
+ end
306
+
307
+ def split_responses_image_output(output_tokens:, image_output_details:, text_output_details:, audio_output:,
308
+ default_to_image: false)
309
+ if image_output_details.zero? && text_output_details.zero?
310
+ remainder = [output_tokens - audio_output, 0].max
311
+ return default_to_image ? [remainder, 0] : [0, remainder]
312
+ end
313
+
314
+ text_output = text_output_details
315
+ text_output = [output_tokens - image_output_details - audio_output, 0].max if text_output.zero?
316
+ [image_output_details, text_output]
109
317
  end
110
318
  end
111
319
 
112
320
  module ResponsesPatch
113
321
  def create(*args, **kwargs)
114
- started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
115
322
  LlmCostTracker::Integrations::Openai.enforce_budget!
116
- response = super
323
+ started_at = LlmCostTracker::Timing.now_monotonic
324
+ response = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
117
325
  LlmCostTracker::Integrations::Openai.record_response(
118
326
  response,
119
327
  request: LlmCostTracker::Integrations::Openai.request_params(args, kwargs),
120
- latency_ms: LlmCostTracker::Integrations::Openai.elapsed_ms(started_at)
328
+ latency_ms: LlmCostTracker::Integrations::Openai.elapsed_ms(started_at),
329
+ host: LlmCostTracker::Integrations::Openai.client_host_for(self)
121
330
  )
122
331
  response
123
332
  end
124
333
 
125
334
  def stream(*args, **kwargs)
126
335
  request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
127
- collector = LlmCostTracker::Integrations::Openai.stream_collector(request)
128
336
  LlmCostTracker::Integrations::Openai.enforce_budget!
129
- stream = super
337
+ host = LlmCostTracker::Integrations::Openai.client_host_for(self)
338
+ collector = LlmCostTracker::Integrations::Openai.stream_collector(request, host: host)
339
+ stream = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
130
340
  LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
131
341
  end
132
342
 
133
343
  def stream_raw(*args, **kwargs)
134
344
  request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
135
- collector = LlmCostTracker::Integrations::Openai.stream_collector(request)
136
345
  LlmCostTracker::Integrations::Openai.enforce_budget!
137
- stream = super
346
+ host = LlmCostTracker::Integrations::Openai.client_host_for(self)
347
+ collector = LlmCostTracker::Integrations::Openai.stream_collector(request, host: host)
348
+ stream = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
138
349
  LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
139
350
  end
140
351
 
141
352
  def retrieve_streaming(response_id, *args, **kwargs)
142
353
  request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
143
- collector = LlmCostTracker::Integrations::Openai.stream_collector(request)
144
- collector.provider_response_id = response_id
145
354
  LlmCostTracker::Integrations::Openai.enforce_budget!
146
- stream = super
355
+ host = LlmCostTracker::Integrations::Openai.client_host_for(self)
356
+ collector = LlmCostTracker::Integrations::Openai.stream_collector(request, host: host)
357
+ collector.provider_response_id = response_id
358
+ stream = super(response_id, *LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
147
359
  LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
148
360
  end
149
361
  end
150
362
 
151
363
  module ChatCompletionsPatch
152
364
  def create(*args, **kwargs)
153
- started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
154
365
  LlmCostTracker::Integrations::Openai.enforce_budget!
155
- response = super
366
+ started_at = LlmCostTracker::Timing.now_monotonic
367
+ response = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
156
368
  LlmCostTracker::Integrations::Openai.record_response(
157
369
  response,
158
370
  request: LlmCostTracker::Integrations::Openai.request_params(args, kwargs),
159
- latency_ms: LlmCostTracker::Integrations::Openai.elapsed_ms(started_at)
371
+ latency_ms: LlmCostTracker::Integrations::Openai.elapsed_ms(started_at),
372
+ host: LlmCostTracker::Integrations::Openai.client_host_for(self)
160
373
  )
161
374
  response
162
375
  end
163
376
 
377
+ def stream(*args, **kwargs)
378
+ request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
379
+ LlmCostTracker::Integrations::Openai.enforce_budget!
380
+ host = LlmCostTracker::Integrations::Openai.client_host_for(self)
381
+ collector = LlmCostTracker::Integrations::Openai.stream_collector(request, host: host)
382
+ stream = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
383
+ LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
384
+ end
385
+
164
386
  def stream_raw(*args, **kwargs)
165
387
  request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
166
- collector = LlmCostTracker::Integrations::Openai.stream_collector(request)
167
388
  LlmCostTracker::Integrations::Openai.enforce_budget!
168
- stream = super
389
+ host = LlmCostTracker::Integrations::Openai.client_host_for(self)
390
+ collector = LlmCostTracker::Integrations::Openai.stream_collector(request, host: host)
391
+ stream = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
392
+ LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
393
+ end
394
+ end
395
+
396
+ module PatchBuilder
397
+ module_function
398
+
399
+ def build(record_method:, methods:)
400
+ Module.new.tap do |mod|
401
+ methods.each { |method_name| define_wrapped_method(mod, method_name, record_method) }
402
+ end
403
+ end
404
+
405
+ def define_wrapped_method(mod, method_name, record_method)
406
+ mod.define_method(method_name) do |*args, **kwargs, &block|
407
+ integration = LlmCostTracker::Integrations::Openai
408
+ integration.enforce_budget!
409
+ started_at = LlmCostTracker::Timing.now_monotonic
410
+ response = super(*integration.normalize_sdk_args(args, kwargs), &block)
411
+ integration.public_send(
412
+ record_method, response,
413
+ request: integration.request_params(args, kwargs),
414
+ latency_ms: integration.elapsed_ms(started_at)
415
+ )
416
+ response
417
+ end
418
+ end
419
+ end
420
+
421
+ EmbeddingsPatch = PatchBuilder.build(record_method: :record_response, methods: %i[create])
422
+ ImagesPatch = PatchBuilder.build(record_method: :record_image, methods: %i[generate edit create_variation])
423
+ TranscriptionsPatch = PatchBuilder.build(record_method: :record_transcription, methods: %i[create])
424
+ TranslationsPatch = PatchBuilder.build(record_method: :record_transcription, methods: %i[create])
425
+ SpeechPatch = PatchBuilder.build(record_method: :record_speech, methods: %i[create])
426
+ ModerationsPatch = PatchBuilder.build(record_method: :record_moderation, methods: %i[create])
427
+
428
+ module StreamingImagesPatch
429
+ %i[generate_stream_raw edit_stream_raw].each do |method_name|
430
+ define_method(method_name) do |*args, **kwargs|
431
+ request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
432
+ LlmCostTracker::Integrations::Openai.enforce_budget!
433
+ host = LlmCostTracker::Integrations::Openai.client_host_for(self)
434
+ collector = LlmCostTracker::Integrations::Openai.stream_collector(request, host: host)
435
+ stream = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
436
+ LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
437
+ end
438
+ end
439
+ end
440
+
441
+ module StreamingTranscriptionsPatch
442
+ def create_streaming(*args, **kwargs)
443
+ request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
444
+ LlmCostTracker::Integrations::Openai.enforce_budget!
445
+ host = LlmCostTracker::Integrations::Openai.client_host_for(self)
446
+ collector = LlmCostTracker::Integrations::Openai.stream_collector(request, host: host)
447
+ stream = super(*LlmCostTracker::Integrations::Openai.normalize_sdk_args(args, kwargs))
169
448
  LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
170
449
  end
171
450
  end