llm_cost_tracker 0.8.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +136 -0
  3. data/README.md +14 -6
  4. data/app/assets/llm_cost_tracker/application.css +65 -5
  5. data/app/controllers/llm_cost_tracker/application_controller.rb +25 -33
  6. data/app/controllers/llm_cost_tracker/assets_controller.rb +1 -1
  7. data/app/controllers/llm_cost_tracker/calls_controller.rb +21 -11
  8. data/app/controllers/llm_cost_tracker/data_quality_controller.rb +4 -0
  9. data/app/controllers/llm_cost_tracker/reconciliation_controller.rb +106 -0
  10. data/app/controllers/llm_cost_tracker/tags_controller.rb +15 -1
  11. data/app/helpers/llm_cost_tracker/application_helper.rb +11 -1
  12. data/app/helpers/llm_cost_tracker/inline_style_helper.rb +28 -0
  13. data/app/helpers/llm_cost_tracker/reconciliation_helper.rb +13 -0
  14. data/app/helpers/llm_cost_tracker/token_usage_helper.rb +5 -1
  15. data/app/models/llm_cost_tracker/call.rb +0 -3
  16. data/app/models/llm_cost_tracker/call_line_item.rb +1 -5
  17. data/app/models/llm_cost_tracker/call_rollup.rb +0 -3
  18. data/app/models/llm_cost_tracker/call_tag.rb +0 -4
  19. data/app/models/llm_cost_tracker/ingestion/inbox_entry.rb +0 -4
  20. data/app/models/llm_cost_tracker/ingestion/lease.rb +0 -3
  21. data/app/models/llm_cost_tracker/provider_invoice.rb +7 -3
  22. data/app/models/llm_cost_tracker/provider_invoice_import.rb +29 -0
  23. data/app/services/llm_cost_tracker/dashboard/data_quality.rb +33 -4
  24. data/app/services/llm_cost_tracker/dashboard/filter.rb +6 -4
  25. data/app/services/llm_cost_tracker/dashboard/setup_state.rb +110 -0
  26. data/app/views/layouts/llm_cost_tracker/application.html.erb +6 -1
  27. data/app/views/llm_cost_tracker/calls/show.html.erb +26 -41
  28. data/app/views/llm_cost_tracker/dashboard/index.html.erb +9 -9
  29. data/app/views/llm_cost_tracker/data_quality/index.html.erb +92 -53
  30. data/app/views/llm_cost_tracker/reconciliation/index.html.erb +183 -0
  31. data/app/views/llm_cost_tracker/shared/_bar.html.erb +1 -1
  32. data/app/views/llm_cost_tracker/shared/_filters.html.erb +3 -0
  33. data/app/views/llm_cost_tracker/shared/_metric_stack.html.erb +1 -1
  34. data/app/views/llm_cost_tracker/tags/show.html.erb +60 -0
  35. data/config/routes.rb +3 -2
  36. data/lib/llm_cost_tracker/billing/components.rb +45 -3
  37. data/lib/llm_cost_tracker/billing/components.yml +71 -0
  38. data/lib/llm_cost_tracker/billing/cost_status.rb +21 -25
  39. data/lib/llm_cost_tracker/billing/line_item.rb +16 -50
  40. data/lib/llm_cost_tracker/budget.rb +31 -7
  41. data/lib/llm_cost_tracker/capture/stream_collector.rb +113 -34
  42. data/lib/llm_cost_tracker/capture/stream_tracker.rb +40 -5
  43. data/lib/llm_cost_tracker/configuration.rb +72 -17
  44. data/lib/llm_cost_tracker/doctor/capture_verifier.rb +1 -1
  45. data/lib/llm_cost_tracker/doctor/cost_drift_check.rb +2 -0
  46. data/lib/llm_cost_tracker/doctor/ingestion_check.rb +30 -4
  47. data/lib/llm_cost_tracker/doctor/invoice_reconciliation_check.rb +164 -0
  48. data/lib/llm_cost_tracker/doctor/legacy_audit_check.rb +0 -2
  49. data/lib/llm_cost_tracker/doctor/legacy_billing_status_check.rb +0 -2
  50. data/lib/llm_cost_tracker/doctor/schema_check.rb +5 -2
  51. data/lib/llm_cost_tracker/doctor.rb +72 -14
  52. data/lib/llm_cost_tracker/engine.rb +8 -0
  53. data/lib/llm_cost_tracker/errors.rb +3 -2
  54. data/lib/llm_cost_tracker/event.rb +48 -1
  55. data/lib/llm_cost_tracker/generators/llm_cost_tracker/async_ingestion_generator.rb +43 -0
  56. data/lib/llm_cost_tracker/generators/llm_cost_tracker/call_rollups_generator.rb +43 -0
  57. data/lib/llm_cost_tracker/generators/llm_cost_tracker/install_generator.rb +17 -26
  58. data/lib/llm_cost_tracker/generators/llm_cost_tracker/reconciliation_generator.rb +34 -0
  59. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_async_ingestion.rb.erb +29 -0
  60. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_call_rollups.rb.erb +15 -0
  61. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_calls.rb.erb +5 -58
  62. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_reconciliation.rb.erb +60 -0
  63. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +35 -25
  64. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_call_rollups_provider.rb.erb +35 -0
  65. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_call_tags_key_value_index.rb.erb +32 -0
  66. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_image_tokens.rb.erb +18 -0
  67. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_provider_invoice_imports_provider.rb.erb +32 -0
  68. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_provider_invoices_metadata_index.rb.erb +25 -0
  69. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_call_rollups_provider_generator.rb +29 -0
  70. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_call_tags_key_value_index_generator.rb +30 -0
  71. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_image_tokens_generator.rb +29 -0
  72. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_provider_invoice_imports_provider_generator.rb +31 -0
  73. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_provider_invoices_metadata_index_generator.rb +31 -0
  74. data/lib/llm_cost_tracker/ingestion/batch.rb +5 -2
  75. data/lib/llm_cost_tracker/ingestion/inbox.rb +3 -25
  76. data/lib/llm_cost_tracker/ingestion/pool.rb +44 -0
  77. data/lib/llm_cost_tracker/ingestion/worker.rb +28 -34
  78. data/lib/llm_cost_tracker/ingestion.rb +48 -11
  79. data/lib/llm_cost_tracker/integrations/anthropic.rb +31 -26
  80. data/lib/llm_cost_tracker/integrations/base.rb +35 -15
  81. data/lib/llm_cost_tracker/integrations/openai.rb +345 -84
  82. data/lib/llm_cost_tracker/integrations/ruby_llm.rb +111 -14
  83. data/lib/llm_cost_tracker/integrations.rb +33 -14
  84. data/lib/llm_cost_tracker/ledger/period/totals.rb +25 -7
  85. data/lib/llm_cost_tracker/ledger/rollups.rb +22 -17
  86. data/lib/llm_cost_tracker/ledger/schema/call_line_items.rb +41 -1
  87. data/lib/llm_cost_tracker/ledger/schema/call_rollups.rb +16 -6
  88. data/lib/llm_cost_tracker/ledger/schema/call_tags.rb +28 -2
  89. data/lib/llm_cost_tracker/ledger/schema/calls.rb +2 -4
  90. data/lib/llm_cost_tracker/ledger/schema/ingestion_inbox_entries.rb +57 -0
  91. data/lib/llm_cost_tracker/ledger/schema/ingestion_leases.rb +52 -0
  92. data/lib/llm_cost_tracker/ledger/schema/provider_invoice_imports.rb +56 -0
  93. data/lib/llm_cost_tracker/ledger/schema/provider_invoices.rb +28 -13
  94. data/lib/llm_cost_tracker/ledger/store.rb +34 -31
  95. data/lib/llm_cost_tracker/ledger/tags/encoding.rb +37 -0
  96. data/lib/llm_cost_tracker/ledger/tags/query.rb +2 -2
  97. data/lib/llm_cost_tracker/ledger.rb +2 -1
  98. data/lib/llm_cost_tracker/logging.rb +0 -4
  99. data/lib/llm_cost_tracker/masking.rb +39 -0
  100. data/lib/llm_cost_tracker/middleware/faraday.rb +120 -33
  101. data/lib/llm_cost_tracker/parsers/anthropic.rb +36 -28
  102. data/lib/llm_cost_tracker/parsers/azure.rb +46 -0
  103. data/lib/llm_cost_tracker/parsers/base.rb +53 -43
  104. data/lib/llm_cost_tracker/parsers/gemini.rb +24 -22
  105. data/lib/llm_cost_tracker/parsers/openai.rb +20 -38
  106. data/lib/llm_cost_tracker/parsers/openai_compatible.rb +26 -39
  107. data/lib/llm_cost_tracker/parsers/openai_service_charges.rb +81 -13
  108. data/lib/llm_cost_tracker/parsers/openai_usage.rb +126 -59
  109. data/lib/llm_cost_tracker/parsers.rb +31 -4
  110. data/lib/llm_cost_tracker/prices.json +572 -493
  111. data/lib/llm_cost_tracker/pricing/backfill.rb +140 -0
  112. data/lib/llm_cost_tracker/pricing/effective_prices.rb +7 -40
  113. data/lib/llm_cost_tracker/pricing/estimator.rb +33 -0
  114. data/lib/llm_cost_tracker/pricing/explainer.rb +4 -1
  115. data/lib/llm_cost_tracker/pricing/lookup.rb +73 -5
  116. data/lib/llm_cost_tracker/pricing/mode.rb +76 -0
  117. data/lib/llm_cost_tracker/pricing/registry.rb +3 -8
  118. data/lib/llm_cost_tracker/pricing/service_charges.rb +14 -12
  119. data/lib/llm_cost_tracker/pricing/{sync_change_printer.rb → sync/change_printer.rb} +3 -3
  120. data/lib/llm_cost_tracker/pricing/sync/registry_writer.rb +62 -1
  121. data/lib/llm_cost_tracker/pricing/sync.rb +4 -10
  122. data/lib/llm_cost_tracker/pricing/unknown.rb +5 -2
  123. data/lib/llm_cost_tracker/pricing.rb +117 -44
  124. data/lib/llm_cost_tracker/providers/anthropic/tier_classification.rb +22 -0
  125. data/lib/llm_cost_tracker/providers/azure/hosts.rb +17 -0
  126. data/lib/llm_cost_tracker/providers/gemini/model_families.rb +17 -0
  127. data/lib/llm_cost_tracker/providers/openai/hosts.rb +35 -0
  128. data/lib/llm_cost_tracker/providers/openai/model_families.rb +51 -0
  129. data/lib/llm_cost_tracker/railtie.rb +8 -0
  130. data/lib/llm_cost_tracker/reconcile_tasks.rb +134 -0
  131. data/lib/llm_cost_tracker/reconciliation/diff.rb +409 -0
  132. data/lib/llm_cost_tracker/reconciliation/diff_result.rb +44 -0
  133. data/lib/llm_cost_tracker/reconciliation/import_result.rb +19 -0
  134. data/lib/llm_cost_tracker/reconciliation/importer.rb +254 -0
  135. data/lib/llm_cost_tracker/reconciliation/sources/anthropic_usage.rb +172 -0
  136. data/lib/llm_cost_tracker/reconciliation/sources/fingerprint.rb +20 -0
  137. data/lib/llm_cost_tracker/reconciliation/sources/openai_usage.rb +142 -0
  138. data/lib/llm_cost_tracker/reconciliation.rb +118 -0
  139. data/lib/llm_cost_tracker/report/data.rb +4 -1
  140. data/lib/llm_cost_tracker/report.rb +0 -4
  141. data/lib/llm_cost_tracker/retention.rb +31 -6
  142. data/lib/llm_cost_tracker/tags/context.rb +3 -4
  143. data/lib/llm_cost_tracker/tags/sanitizer.rb +73 -21
  144. data/lib/llm_cost_tracker/token_usage.rb +14 -2
  145. data/lib/llm_cost_tracker/tracker.rb +41 -55
  146. data/lib/llm_cost_tracker/version.rb +1 -1
  147. data/lib/llm_cost_tracker.rb +19 -14
  148. data/lib/tasks/llm_cost_tracker.rake +41 -4
  149. metadata +49 -3
  150. data/lib/llm_cost_tracker/usage_capture.rb +0 -58
@@ -68,6 +68,26 @@
68
68
  token_key: audio_output_tokens
69
69
  cost_key: audio_output_cost
70
70
 
71
+ - key: image_input
72
+ kind: image_token
73
+ direction: input
74
+ modality: image
75
+ cache_state: none
76
+ unit: token
77
+ category: token
78
+ token_key: image_input_tokens
79
+ cost_key: image_input_cost
80
+
81
+ - key: image_output
82
+ kind: image_token
83
+ direction: output
84
+ modality: image
85
+ cache_state: none
86
+ unit: token
87
+ category: token
88
+ token_key: image_output_tokens
89
+ cost_key: image_output_cost
90
+
71
91
  - key: web_search_request
72
92
  kind: web_search_request
73
93
  direction: neither
@@ -75,6 +95,34 @@
75
95
  cache_state: none
76
96
  unit: request
77
97
  category: tool
98
+ rate_basis: per_1k_requests
99
+
100
+ - key: web_search_preview_request_reasoning
101
+ kind: web_search_preview_request_reasoning
102
+ direction: neither
103
+ modality: text
104
+ cache_state: none
105
+ unit: request
106
+ category: tool
107
+ rate_basis: per_1k_requests
108
+
109
+ - key: web_search_preview_request_non_reasoning
110
+ kind: web_search_preview_request_non_reasoning
111
+ direction: neither
112
+ modality: text
113
+ cache_state: none
114
+ unit: request
115
+ category: tool
116
+ rate_basis: per_1k_requests
117
+
118
+ - key: web_fetch_request
119
+ kind: web_fetch_request
120
+ direction: neither
121
+ modality: text
122
+ cache_state: none
123
+ unit: request
124
+ category: tool
125
+ rate_basis: per_1k_requests
78
126
 
79
127
  - key: file_search_call
80
128
  kind: file_search_call
@@ -83,6 +131,7 @@
83
131
  cache_state: none
84
132
  unit: request
85
133
  category: tool
134
+ rate_basis: per_1k_requests
86
135
 
87
136
  - key: container_session
88
137
  kind: container_session
@@ -91,6 +140,7 @@
91
140
  cache_state: none
92
141
  unit: session
93
142
  category: runtime
143
+ rate_basis: per_session
94
144
 
95
145
  - key: code_execution_request
96
146
  kind: code_execution_request
@@ -99,6 +149,7 @@
99
149
  cache_state: none
100
150
  unit: request
101
151
  category: runtime
152
+ rate_basis: per_1k_requests
102
153
 
103
154
  - key: code_execution_hour
104
155
  kind: code_execution_hour
@@ -107,6 +158,7 @@
107
158
  cache_state: none
108
159
  unit: hour
109
160
  category: runtime
161
+ rate_basis: per_hour
110
162
 
111
163
  - key: grounding_request
112
164
  kind: grounding_request
@@ -115,3 +167,22 @@
115
167
  cache_state: none
116
168
  unit: request
117
169
  category: tool
170
+ rate_basis: per_1k_requests
171
+
172
+ - key: text_to_speech_character
173
+ kind: text_to_speech_character
174
+ direction: output
175
+ modality: audio
176
+ cache_state: none
177
+ unit: character
178
+ category: tool
179
+ rate_basis: per_million_characters
180
+
181
+ - key: mcp_call
182
+ kind: mcp_call
183
+ direction: neither
184
+ modality: text
185
+ cache_state: none
186
+ unit: request
187
+ category: tool
188
+ rate_basis: per_request
@@ -10,36 +10,32 @@ module LlmCostTracker
10
10
  PARTIAL = "partial"
11
11
  UNKNOWN = "unknown"
12
12
 
13
- class << self
14
- # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
15
- def call(token_usage:, usage_source:, token_cost:, service_line_items:, total_cost:,
16
- token_pricing_partial: false)
17
- return UNKNOWN if usage_source == :unknown
13
+ # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
14
+ def self.call(token_usage:, usage_source:, token_cost:, service_line_items:, total_cost:,
15
+ token_pricing_partial: false)
16
+ return UNKNOWN if usage_source == :unknown
18
17
 
19
- token_billable = Components::TOKEN_PRICED.any? do |component|
20
- token_usage.public_send(component.token_key).positive?
21
- end
22
- service_billable = false
23
- service_priced = false
24
- service_unpriced = false
25
- service_line_items.each do |line_item|
26
- next unless line_item.billable?
18
+ token_billable = token_usage.priced_quantities.any? { |_key, quantity| quantity.positive? }
19
+ service_billable = false
20
+ service_priced = false
21
+ service_unpriced = false
22
+ service_line_items.each do |line_item|
23
+ next unless line_item.billable?
27
24
 
28
- service_billable = true
29
- service_priced ||= line_item.priced?
30
- service_unpriced ||= line_item.unpriced?
31
- break if service_priced && service_unpriced
32
- end
25
+ service_billable = true
26
+ service_priced ||= line_item.priced?
27
+ service_unpriced ||= line_item.unpriced?
28
+ break if service_priced && service_unpriced
29
+ end
33
30
 
34
- priced = (token_billable && !token_cost.nil?) || service_priced || (!token_billable && !service_billable)
35
- unpriced = (token_billable && (token_cost.nil? || token_pricing_partial)) || service_unpriced
36
- return UNKNOWN if unpriced && !priced
37
- return PARTIAL if unpriced
31
+ priced = (token_billable && !token_cost.nil?) || service_priced || (!token_billable && !service_billable)
32
+ unpriced = (token_billable && (token_cost.nil? || token_pricing_partial)) || service_unpriced
33
+ return UNKNOWN if unpriced && !priced
34
+ return PARTIAL if unpriced
38
35
 
39
- total_cost.nil? || total_cost.zero? ? FREE : COMPLETE
40
- end
41
- # rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
36
+ total_cost.nil? || total_cost.zero? ? FREE : COMPLETE
42
37
  end
38
+ # rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
43
39
  end
44
40
  end
45
41
  end
@@ -30,28 +30,11 @@ module LlmCostTracker
30
30
 
31
31
  class LineItem
32
32
  USD = "USD"
33
- OPTIONAL_ATTRIBUTES = %i[
34
- pricing_basis
35
- price_key
36
- price_source
37
- price_source_version
38
- provider_field
39
- provider_item_id
40
- ].freeze
41
- SYMBOL_ATTRIBUTES = %i[
42
- kind
43
- direction
44
- modality
45
- cache_state
46
- unit
47
- pricing_basis
48
- price_source
49
- ].freeze
50
33
 
51
34
  def self.build(attributes)
52
35
  attributes = attributes.to_h
53
36
  component = component_for(attributes)
54
- normalized = {
37
+ new(
55
38
  kind: symbol_or_nil(attributes[:kind]) || component&.kind,
56
39
  direction: symbol_or_nil(attributes[:direction]) || component&.direction,
57
40
  modality: symbol_or_nil(attributes[:modality]) || component&.modality,
@@ -63,38 +46,30 @@ module LlmCostTracker
63
46
  cost: decimal_or_nil(attributes[:cost]),
64
47
  currency: attributes[:currency] || USD,
65
48
  cost_status: cost_status_for(attributes),
49
+ pricing_basis: symbol_or_nil(attributes[:pricing_basis]),
50
+ price_key: attributes[:price_key],
51
+ price_source: symbol_or_nil(attributes[:price_source]),
52
+ price_source_version: attributes[:price_source_version],
53
+ provider_field: attributes[:provider_field],
54
+ provider_item_id: attributes[:provider_item_id],
66
55
  details: attributes[:details] || {}
67
- }.merge(optional_attributes_for(attributes))
68
-
69
- new(**normalized)
56
+ )
70
57
  end
71
58
 
72
59
  def self.from_token_usage(token_usage)
73
60
  return [] unless token_usage
74
61
 
75
- Components::TOKEN_PRICED.filter_map do |component|
76
- quantity = token_usage.public_send(component.token_key)
62
+ token_usage.priced_quantities.filter_map do |key, quantity|
77
63
  next unless quantity.positive?
78
64
 
79
- new(
65
+ component = Components::BY_KEY.fetch(key)
66
+ build(
80
67
  kind: component.kind,
81
68
  direction: component.direction,
82
69
  modality: component.modality,
83
70
  cache_state: component.cache_state,
84
- quantity: BigDecimal(quantity.to_s),
85
- unit: component.unit,
86
- rate_amount: nil,
87
- rate_quantity: BigDecimal("1"),
88
- cost: nil,
89
- currency: USD,
90
- cost_status: CostStatus::UNKNOWN,
91
- pricing_basis: nil,
92
- price_key: nil,
93
- price_source: nil,
94
- price_source_version: nil,
95
- provider_field: nil,
96
- provider_item_id: nil,
97
- details: {}
71
+ quantity: quantity,
72
+ unit: component.unit
98
73
  )
99
74
  end
100
75
  end
@@ -119,7 +94,7 @@ module LlmCostTracker
119
94
  def self.symbol_or_nil(value)
120
95
  return nil if value.nil?
121
96
 
122
- value.is_a?(Symbol) ? value : value.to_s.to_sym
97
+ value.to_s.to_sym
123
98
  end
124
99
 
125
100
  def self.decimal_or_nil(value)
@@ -132,16 +107,7 @@ module LlmCostTracker
132
107
  decimal_or_nil(value) || BigDecimal("0")
133
108
  end
134
109
 
135
- def self.optional_attributes_for(attributes)
136
- OPTIONAL_ATTRIBUTES.to_h do |key|
137
- value = attributes[key]
138
- value = value.to_sym if value.is_a?(String) && SYMBOL_ATTRIBUTES.include?(key)
139
- [key, value]
140
- end
141
- end
142
-
143
- private_class_method :cost_status_for, :component_for, :symbol_or_nil, :decimal_or_nil, :decimal_or_zero,
144
- :optional_attributes_for
110
+ private_class_method :cost_status_for, :component_for, :symbol_or_nil, :decimal_or_nil, :decimal_or_zero
145
111
 
146
112
  def billable?
147
113
  quantity.positive?
@@ -163,7 +129,7 @@ module LlmCostTracker
163
129
  cost || BigDecimal("0")
164
130
  end
165
131
 
166
- def apply_rate(rate)
132
+ def with_rate(rate)
167
133
  rate_amount = rate.fetch(:amount)
168
134
  rate_quantity = rate.fetch(:quantity)
169
135
  applied_cost = (quantity / rate_quantity) * rate_amount
@@ -1,26 +1,35 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "bigdecimal"
4
+
3
5
  require_relative "logging"
4
6
  require_relative "ledger"
7
+ require_relative "pricing/estimator"
5
8
 
6
9
  module LlmCostTracker
7
10
  class Budget
8
11
  BUDGET_TYPE_TO_PERIOD = { monthly: :month, daily: :day }.freeze
9
12
 
10
13
  class << self
11
- def enforce!
14
+ def enforce!(provider: nil, model: nil, request: nil)
12
15
  config = LlmCostTracker.configuration
13
16
  return unless config.budget_exceeded_behavior == :block_requests
14
17
 
18
+ estimate = estimate_cost(provider: provider, model: model, request: request)
19
+ raise_per_call_pre_send(estimate, config.per_call_budget) if config.per_call_budget && estimate.positive?
20
+
15
21
  budgets = { monthly: config.monthly_budget, daily: config.daily_budget }.compact
16
22
  return if budgets.empty?
17
23
 
18
24
  totals = totals_for(budgets.keys, time: Time.now.utc)
19
25
 
20
26
  budgets.each do |budget_type, budget|
21
- total = totals.fetch(budget_type)
27
+ total = totals.fetch(budget_type) + estimate
28
+ next unless total >= budget
22
29
 
23
- handle_exceeded(budget_type: budget_type, total: total, budget: budget) if total >= budget
30
+ raise BudgetExceededError.new(**budget_payload(
31
+ budget_type: budget_type, total: total, budget: budget, last_event: nil, stage: :pre_send
32
+ ))
24
33
  end
25
34
  end
26
35
 
@@ -41,6 +50,20 @@ module LlmCostTracker
41
50
 
42
51
  private
43
52
 
53
+ def estimate_cost(provider:, model:, request:)
54
+ return BigDecimal("0") unless provider && model && request
55
+
56
+ Pricing::Estimator.call(provider: provider, model: model, request: request) || BigDecimal("0")
57
+ end
58
+
59
+ def raise_per_call_pre_send(estimate, budget)
60
+ return unless estimate >= budget
61
+
62
+ raise BudgetExceededError.new(**budget_payload(
63
+ budget_type: :per_call, total: estimate, budget: budget, last_event: nil, stage: :pre_send
64
+ ))
65
+ end
66
+
44
67
  def check_per_call_budget(event, config)
45
68
  budget = config.per_call_budget
46
69
  return unless budget
@@ -67,7 +90,8 @@ module LlmCostTracker
67
90
  budget_type: budget_type,
68
91
  total: total,
69
92
  budget: budget,
70
- last_event: last_event
93
+ last_event: last_event,
94
+ stage: :post_spend
71
95
  )
72
96
 
73
97
  if notify_exceeded?(config, budget_type: budget_type, total: total, budget: budget, last_event: last_event)
@@ -76,18 +100,18 @@ module LlmCostTracker
76
100
  raise BudgetExceededError.new(**payload) if %i[raise block_requests].include?(config.budget_exceeded_behavior)
77
101
  end
78
102
 
79
- def budget_payload(budget_type:, total:, budget:, last_event:)
103
+ def budget_payload(budget_type:, total:, budget:, last_event:, stage:)
80
104
  {
81
105
  budget_type: budget_type,
82
106
  total: total,
83
107
  budget: budget,
84
- last_event: last_event
108
+ last_event: last_event,
109
+ stage: stage
85
110
  }
86
111
  end
87
112
 
88
113
  def notify_exceeded?(config, budget_type:, total:, budget:, last_event:)
89
114
  return false unless config.on_budget_exceeded
90
- return true unless config.budget_exceeded_behavior == :notify
91
115
  return true unless last_event&.total_cost
92
116
  return true if budget_type == :per_call
93
117
 
@@ -5,6 +5,7 @@ require "active_support/core_ext/object/deep_dup"
5
5
  require "json"
6
6
 
7
7
  require_relative "stream"
8
+ require_relative "../pricing/mode"
8
9
  require_relative "../timing"
9
10
 
10
11
  module LlmCostTracker
@@ -14,7 +15,7 @@ module LlmCostTracker
14
15
 
15
16
  def initialize(provider:, model:, latency_ms: nil, provider_response_id: nil, provider_project_id: nil,
16
17
  provider_api_key_id: nil, provider_workspace_id: nil, batch: nil, pricing_mode: nil,
17
- metadata: {}, context_tags: nil)
18
+ metadata: {}, context_tags: nil, request: nil)
18
19
  @provider = provider.to_s
19
20
  @model = model
20
21
  @latency_ms = latency_ms
@@ -26,12 +27,14 @@ module LlmCostTracker
26
27
  @pricing_mode = pricing_mode
27
28
  @metadata = (metadata || {}).deep_dup
28
29
  @context_tags = (context_tags || LlmCostTracker::Tags::Context.tags).deep_dup
30
+ @request = request
29
31
  @events = []
30
32
  @captured_bytes = 0
31
33
  @overflowed = false
32
34
  @explicit_usage = nil
33
35
  @started_at = LlmCostTracker::Timing.now_monotonic
34
36
  @finished = false
37
+ @recording = false
35
38
  @mutex = Mutex.new
36
39
  end
37
40
 
@@ -66,7 +69,6 @@ module LlmCostTracker
66
69
  ensure_open!
67
70
  capture_event(data, type: type) unless data.nil?
68
71
  end
69
- self
70
72
  end
71
73
 
72
74
  def usage(input_tokens:, output_tokens:, **extra)
@@ -84,14 +86,22 @@ module LlmCostTracker
84
86
  output_tokens: output_tokens
85
87
  )
86
88
  end
87
- self
88
89
  end
89
90
 
90
91
  def finish!(errored: false)
91
- snapshot = @mutex.synchronize do
92
- return if @finished
92
+ snapshot = claim_recording_slot
93
+ return if snapshot.nil?
93
94
 
94
- @finished = true
95
+ record_snapshot(snapshot, errored: errored)
96
+ end
97
+
98
+ private
99
+
100
+ def claim_recording_slot
101
+ @mutex.synchronize do
102
+ return nil if @finished || @recording
103
+
104
+ @recording = true
95
105
  pricing_mode = Pricing.normalize_mode(@pricing_mode)
96
106
  {
97
107
  events: @events.dup,
@@ -103,27 +113,50 @@ module LlmCostTracker
103
113
  capture_dimensions: capture_dimensions(pricing_mode),
104
114
  pricing_mode: pricing_mode,
105
115
  metadata: @metadata.deep_dup,
106
- context_tags: @context_tags.deep_dup
116
+ context_tags: @context_tags.deep_dup,
117
+ request: @request
107
118
  }
108
119
  end
120
+ end
109
121
 
110
- capture = build_usage_capture(snapshot)
111
- provider_response_id = capture.provider_response_id || snapshot[:provider_response_id]
112
- capture = capture.with(provider_response_id: provider_response_id)
122
+ def record_snapshot(snapshot, errored:)
123
+ save_succeeded = false
124
+ begin
125
+ event = build_event(snapshot)
126
+ provider_response_id = event.provider_response_id || snapshot[:provider_response_id]
127
+ event = event.with(provider_response_id: provider_response_id)
113
128
 
114
- Tracker.record(
115
- capture: capture,
116
- latency_ms: snapshot[:latency_ms] || LlmCostTracker::Timing.elapsed_ms(@started_at),
117
- pricing_mode: snapshot[:pricing_mode],
118
- metadata: (errored ? { stream_errored: true } : {}).merge(snapshot[:metadata]),
119
- context_tags: snapshot[:context_tags]
120
- )
129
+ Tracker.record(
130
+ event: event,
131
+ latency_ms: snapshot[:latency_ms] || LlmCostTracker::Timing.elapsed_ms(@started_at),
132
+ pricing_mode: merge_pricing_modes(event.pricing_mode, snapshot[:pricing_mode]),
133
+ metadata: (errored ? { stream_errored: true } : {}).merge(snapshot[:metadata]),
134
+ context_tags: snapshot[:context_tags]
135
+ ) { save_succeeded = true }
136
+ ensure
137
+ @mutex.synchronize do
138
+ @finished = save_succeeded
139
+ @recording = false
140
+ end
141
+ end
121
142
  end
122
143
 
123
- private
144
+ HOST_DERIVED_MODE_TOKENS = %i[data_residency].freeze
145
+ private_constant :HOST_DERIVED_MODE_TOKENS
146
+
147
+ def merge_pricing_modes(provider_mode, request_mode)
148
+ return Pricing.normalize_mode(request_mode) if provider_mode.to_s.strip.empty?
149
+
150
+ provider_tokens = Pricing::Mode.tokenize(provider_mode) - Pricing::STANDARD_MODE_VALUES
151
+ request_host_tokens = Pricing::Mode.tokenize(request_mode || "") & HOST_DERIVED_MODE_TOKENS
152
+ combined = provider_tokens | request_host_tokens
153
+ return nil if combined.empty?
154
+
155
+ Pricing.normalize_mode(combined.join("_"))
156
+ end
124
157
 
125
158
  def capture_dimensions(pricing_mode)
126
- batch = @batch.nil? ? UsageCapture.batch_from_pricing_mode?(pricing_mode).presence : @batch
159
+ batch = @batch.nil? ? Event.batch_from_pricing_mode?(pricing_mode).presence : @batch
127
160
  {
128
161
  provider_project_id: @provider_project_id.to_s.strip.presence,
129
162
  provider_api_key_id: @provider_api_key_id.to_s.strip.presence,
@@ -138,34 +171,44 @@ module LlmCostTracker
138
171
  raise FrozenError, "can't modify finished LlmCostTracker::Capture::StreamCollector"
139
172
  end
140
173
 
141
- def build_usage_capture(snapshot)
174
+ def build_event(snapshot)
142
175
  return build_from_explicit_usage(snapshot) if snapshot[:explicit_usage]
176
+ return build_unknown_usage(snapshot) if snapshot[:overflowed]
143
177
 
144
- capture = Parsers.find_for_provider(@provider)&.parse_stream(
178
+ event = Parsers.find_for_provider(@provider)&.parse_stream(
145
179
  response_status: 200,
146
- events: snapshot[:events]
180
+ events: snapshot[:events],
181
+ request_body: request_body_for(snapshot[:request])
147
182
  )
148
- if capture && (capture.usage_source != :unknown || !snapshot[:overflowed])
149
- model = present_model(capture.model) || present_model(snapshot[:model]) || UsageCapture::UNKNOWN_MODEL
150
- return capture.with(provider: @provider, model: model, **snapshot.fetch(:capture_dimensions))
183
+ if event
184
+ model = present_model(event.model) || present_model(snapshot[:model]) || Event::UNKNOWN_MODEL
185
+ return event.with(provider: @provider, model: model, **snapshot.fetch(:capture_dimensions))
151
186
  end
152
187
 
153
188
  build_unknown_usage(snapshot)
154
189
  end
155
190
 
191
+ def request_body_for(request)
192
+ return nil unless request
193
+
194
+ JSON.generate(request)
195
+ rescue StandardError
196
+ nil
197
+ end
198
+
156
199
  def present_model(value)
157
200
  return nil if value.nil?
158
201
 
159
202
  string = value.to_s.presence
160
- return nil if string.nil? || string == "unknown"
203
+ return nil if string.nil? || string == Event::UNKNOWN_MODEL
161
204
 
162
205
  string
163
206
  end
164
207
 
165
208
  def build_from_explicit_usage(snapshot)
166
- UsageCapture.build(
209
+ Event.build(
167
210
  provider: @provider,
168
- model: snapshot[:model] || UsageCapture::UNKNOWN_MODEL,
211
+ model: snapshot[:model] || Event::UNKNOWN_MODEL,
169
212
  token_usage: snapshot[:explicit_usage],
170
213
  stream: true,
171
214
  usage_source: :manual,
@@ -175,9 +218,9 @@ module LlmCostTracker
175
218
  end
176
219
 
177
220
  def build_unknown_usage(snapshot)
178
- UsageCapture.build(
221
+ Event.build(
179
222
  provider: @provider,
180
- model: snapshot[:model] || UsageCapture::UNKNOWN_MODEL,
223
+ model: snapshot[:model] || Event::UNKNOWN_MODEL,
181
224
  token_usage: TokenUsage.build(input_tokens: 0, output_tokens: 0, total_tokens: 0),
182
225
  stream: true,
183
226
  usage_source: :unknown,
@@ -186,18 +229,54 @@ module LlmCostTracker
186
229
  )
187
230
  end
188
231
 
232
+ IGNORED_PAYLOAD_KEYS = %w[b64_json partial_image_b64].freeze
233
+ private_constant :IGNORED_PAYLOAD_KEYS
234
+
235
+ HEAVY_STRING_BYTES = 8 * 1024
236
+ private_constant :HEAVY_STRING_BYTES
237
+
189
238
  def capture_event(data, type:)
190
- event = { event: type, data: data }
191
- size = JSON.generate(event).bytesize
239
+ event = { event: type, data: strip_heavy_payload(data) }
240
+ size = approximate_bytesize(event)
192
241
  if @captured_bytes + size <= Capture::Stream::LIMIT_BYTES
193
- @events << event.deep_dup
242
+ @events << event
194
243
  @captured_bytes += size
195
244
  else
196
245
  @overflowed = true
197
246
  end
198
- rescue JSON::JSONError, TypeError
247
+ rescue TypeError, SystemStackError
199
248
  @overflowed = true
200
249
  end
250
+
251
+ def strip_heavy_payload(value)
252
+ case value
253
+ when Hash
254
+ value.each_with_object({}) do |(key, nested), out|
255
+ next if IGNORED_PAYLOAD_KEYS.include?(key.to_s)
256
+
257
+ out[key] = strip_heavy_payload(nested)
258
+ end
259
+ when Array
260
+ value.map { |nested| strip_heavy_payload(nested) }
261
+ when String
262
+ value.bytesize > HEAVY_STRING_BYTES ? "" : value
263
+ else
264
+ value
265
+ end
266
+ end
267
+
268
+ def approximate_bytesize(value)
269
+ case value
270
+ when Hash
271
+ value.sum { |key, nested| approximate_bytesize(key) + approximate_bytesize(nested) + 4 }
272
+ when Array
273
+ value.sum { |nested| approximate_bytesize(nested) + 2 }
274
+ when Numeric, true, false, nil
275
+ 8
276
+ else
277
+ value.to_s.bytesize + 2
278
+ end
279
+ end
201
280
  end
202
281
  end
203
282
  end