llm_cost_tracker 0.7.3 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. checksums.yaml +4 -4
  2. data/.ruby-version +1 -0
  3. data/CHANGELOG.md +173 -0
  4. data/README.md +60 -220
  5. data/app/assets/llm_cost_tracker/application.css +282 -45
  6. data/app/controllers/llm_cost_tracker/application_controller.rb +25 -20
  7. data/app/controllers/llm_cost_tracker/assets_controller.rb +11 -1
  8. data/app/controllers/llm_cost_tracker/calls_controller.rb +22 -19
  9. data/app/controllers/llm_cost_tracker/data_quality_controller.rb +14 -2
  10. data/app/controllers/llm_cost_tracker/reconciliation_controller.rb +106 -0
  11. data/app/controllers/llm_cost_tracker/tags_controller.rb +15 -1
  12. data/app/helpers/llm_cost_tracker/application_helper.rb +18 -21
  13. data/app/helpers/llm_cost_tracker/dashboard_filter_helper.rb +3 -21
  14. data/app/helpers/llm_cost_tracker/dashboard_filter_options_helper.rb +4 -4
  15. data/app/helpers/llm_cost_tracker/dashboard_query_helper.rb +1 -1
  16. data/app/helpers/llm_cost_tracker/inline_style_helper.rb +28 -0
  17. data/app/helpers/llm_cost_tracker/reconciliation_helper.rb +13 -0
  18. data/app/helpers/llm_cost_tracker/token_usage_helper.rb +24 -7
  19. data/app/models/llm_cost_tracker/call.rb +166 -0
  20. data/app/models/llm_cost_tracker/call_line_item.rb +18 -0
  21. data/app/models/llm_cost_tracker/call_rollup.rb +6 -0
  22. data/app/models/llm_cost_tracker/call_tag.rb +12 -0
  23. data/app/models/llm_cost_tracker/ingestion/inbox_entry.rb +9 -0
  24. data/app/models/llm_cost_tracker/ingestion/lease.rb +0 -3
  25. data/app/models/llm_cost_tracker/provider_invoice.rb +13 -0
  26. data/app/models/llm_cost_tracker/provider_invoice_import.rb +24 -0
  27. data/app/services/llm_cost_tracker/dashboard/data_quality.rb +152 -32
  28. data/app/services/llm_cost_tracker/dashboard/date_range.rb +1 -1
  29. data/app/services/llm_cost_tracker/dashboard/filter.rb +8 -6
  30. data/app/services/llm_cost_tracker/dashboard/overview_stats.rb +74 -21
  31. data/app/services/llm_cost_tracker/dashboard/pagination.rb +6 -4
  32. data/app/services/llm_cost_tracker/dashboard/params.rb +8 -2
  33. data/app/services/llm_cost_tracker/dashboard/provider_breakdown.rb +1 -1
  34. data/app/services/llm_cost_tracker/dashboard/spend_anomaly.rb +4 -3
  35. data/app/services/llm_cost_tracker/dashboard/tag_breakdown.rb +42 -9
  36. data/app/services/llm_cost_tracker/dashboard/tag_key_explorer.rb +14 -37
  37. data/app/services/llm_cost_tracker/dashboard/time_series.rb +1 -1
  38. data/app/services/llm_cost_tracker/dashboard/top_models.rb +1 -1
  39. data/app/views/layouts/llm_cost_tracker/application.html.erb +6 -1
  40. data/app/views/llm_cost_tracker/calls/index.html.erb +33 -75
  41. data/app/views/llm_cost_tracker/calls/show.html.erb +73 -33
  42. data/app/views/llm_cost_tracker/dashboard/index.html.erb +16 -57
  43. data/app/views/llm_cost_tracker/data_quality/index.html.erb +183 -167
  44. data/app/views/llm_cost_tracker/errors/database.html.erb +1 -1
  45. data/app/views/llm_cost_tracker/models/index.html.erb +18 -50
  46. data/app/views/llm_cost_tracker/reconciliation/index.html.erb +183 -0
  47. data/app/views/llm_cost_tracker/shared/_bar.html.erb +1 -1
  48. data/app/views/llm_cost_tracker/shared/_filters.html.erb +66 -0
  49. data/app/views/llm_cost_tracker/shared/_metric_stack.html.erb +1 -1
  50. data/app/views/llm_cost_tracker/shared/_sort.html.erb +13 -0
  51. data/app/views/llm_cost_tracker/shared/setup_required.html.erb +1 -1
  52. data/app/views/llm_cost_tracker/tags/index.html.erb +3 -34
  53. data/app/views/llm_cost_tracker/tags/show.html.erb +64 -36
  54. data/config/routes.rb +3 -2
  55. data/lib/llm_cost_tracker/billing/components.rb +95 -0
  56. data/lib/llm_cost_tracker/billing/components.yml +188 -0
  57. data/lib/llm_cost_tracker/billing/cost_status.rb +45 -0
  58. data/lib/llm_cost_tracker/billing/line_item.rb +189 -0
  59. data/lib/llm_cost_tracker/budget.rb +26 -36
  60. data/lib/llm_cost_tracker/capture/stream_collector.rb +125 -38
  61. data/lib/llm_cost_tracker/capture/stream_tracker.rb +40 -5
  62. data/lib/llm_cost_tracker/configuration.rb +86 -17
  63. data/lib/llm_cost_tracker/dashboard_setup_state.rb +109 -0
  64. data/lib/llm_cost_tracker/doctor/cost_drift_check.rb +56 -0
  65. data/lib/llm_cost_tracker/doctor/ingestion_check.rb +48 -30
  66. data/lib/llm_cost_tracker/doctor/invoice_reconciliation_check.rb +164 -0
  67. data/lib/llm_cost_tracker/doctor/legacy_audit_check.rb +36 -0
  68. data/lib/llm_cost_tracker/doctor/legacy_billing_status_check.rb +22 -0
  69. data/lib/llm_cost_tracker/doctor/price_check.rb +2 -2
  70. data/lib/llm_cost_tracker/doctor/pricing_snapshot_drift_check.rb +85 -0
  71. data/lib/llm_cost_tracker/doctor/probe.rb +17 -0
  72. data/lib/llm_cost_tracker/doctor/schema_check.rb +34 -0
  73. data/lib/llm_cost_tracker/doctor.rb +111 -44
  74. data/lib/llm_cost_tracker/engine.rb +9 -0
  75. data/lib/llm_cost_tracker/errors.rb +5 -19
  76. data/lib/llm_cost_tracker/event.rb +11 -3
  77. data/lib/llm_cost_tracker/generators/llm_cost_tracker/call_rollups_generator.rb +43 -0
  78. data/lib/llm_cost_tracker/generators/llm_cost_tracker/durable_ingestion_generator.rb +43 -0
  79. data/lib/llm_cost_tracker/generators/llm_cost_tracker/install_generator.rb +17 -5
  80. data/lib/llm_cost_tracker/generators/llm_cost_tracker/prices_generator.rb +2 -6
  81. data/lib/llm_cost_tracker/generators/llm_cost_tracker/reconciliation_generator.rb +34 -0
  82. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_call_rollups.rb.erb +15 -0
  83. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_calls.rb.erb +104 -0
  84. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_durable_ingestion.rb.erb +29 -0
  85. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_reconciliation.rb.erb +55 -0
  86. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +28 -25
  87. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_call_rollups_provider.rb.erb +20 -0
  88. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_call_tags_key_value_index.rb.erb +32 -0
  89. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_image_tokens.rb.erb +18 -0
  90. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_call_rollups_provider_generator.rb +38 -0
  91. data/lib/llm_cost_tracker/generators/llm_cost_tracker/{add_provider_response_id_generator.rb → upgrade_call_tags_key_value_index_generator.rb} +5 -4
  92. data/lib/llm_cost_tracker/generators/llm_cost_tracker/{add_streaming_generator.rb → upgrade_image_tokens_generator.rb} +4 -4
  93. data/lib/llm_cost_tracker/ingestion/batch.rb +11 -12
  94. data/lib/llm_cost_tracker/ingestion/inbox.rb +39 -24
  95. data/lib/llm_cost_tracker/ingestion/inline.rb +22 -0
  96. data/lib/llm_cost_tracker/ingestion/worker.rb +24 -7
  97. data/lib/llm_cost_tracker/ingestion.rb +66 -22
  98. data/lib/llm_cost_tracker/integrations/anthropic.rb +68 -42
  99. data/lib/llm_cost_tracker/integrations/base.rb +56 -32
  100. data/lib/llm_cost_tracker/integrations/openai.rb +342 -63
  101. data/lib/llm_cost_tracker/integrations/ruby_llm.rb +110 -11
  102. data/lib/llm_cost_tracker/integrations.rb +21 -3
  103. data/lib/llm_cost_tracker/ledger/period/totals.rb +30 -11
  104. data/lib/llm_cost_tracker/ledger/period.rb +5 -5
  105. data/lib/llm_cost_tracker/ledger/rollups/upsert_sql.rb +2 -2
  106. data/lib/llm_cost_tracker/ledger/rollups.rb +90 -25
  107. data/lib/llm_cost_tracker/ledger/schema/adapter.rb +18 -0
  108. data/lib/llm_cost_tracker/ledger/schema/call_line_items.rb +79 -0
  109. data/lib/llm_cost_tracker/ledger/schema/call_rollups.rb +37 -0
  110. data/lib/llm_cost_tracker/ledger/schema/call_tags.rb +41 -0
  111. data/lib/llm_cost_tracker/ledger/schema/calls.rb +36 -23
  112. data/lib/llm_cost_tracker/ledger/schema/ingestion_inbox_entries.rb +47 -0
  113. data/lib/llm_cost_tracker/ledger/schema/ingestion_leases.rb +42 -0
  114. data/lib/llm_cost_tracker/ledger/schema/provider_invoice_imports.rb +46 -0
  115. data/lib/llm_cost_tracker/ledger/schema/provider_invoices.rb +57 -0
  116. data/lib/llm_cost_tracker/ledger/store.rb +103 -20
  117. data/lib/llm_cost_tracker/ledger/tags/encoding.rb +37 -0
  118. data/lib/llm_cost_tracker/ledger/tags/query.rb +6 -11
  119. data/lib/llm_cost_tracker/ledger/tags/sql.rb +27 -15
  120. data/lib/llm_cost_tracker/ledger.rb +5 -2
  121. data/lib/llm_cost_tracker/logging.rb +2 -5
  122. data/lib/llm_cost_tracker/masking.rb +39 -0
  123. data/lib/llm_cost_tracker/middleware/faraday.rb +95 -35
  124. data/lib/llm_cost_tracker/parsers/anthropic.rb +74 -14
  125. data/lib/llm_cost_tracker/parsers/base.rb +13 -4
  126. data/lib/llm_cost_tracker/parsers/gemini.rb +105 -15
  127. data/lib/llm_cost_tracker/parsers/openai.rb +16 -2
  128. data/lib/llm_cost_tracker/parsers/openai_compatible.rb +15 -3
  129. data/lib/llm_cost_tracker/parsers/openai_service_charges.rb +126 -0
  130. data/lib/llm_cost_tracker/parsers/openai_usage.rb +157 -59
  131. data/lib/llm_cost_tracker/parsers/sse.rb +1 -1
  132. data/lib/llm_cost_tracker/parsers.rb +1 -1
  133. data/lib/llm_cost_tracker/prices.json +198 -22
  134. data/lib/llm_cost_tracker/pricing/effective_prices.rb +28 -21
  135. data/lib/llm_cost_tracker/pricing/explainer.rb +4 -5
  136. data/lib/llm_cost_tracker/pricing/lookup.rb +73 -36
  137. data/lib/llm_cost_tracker/pricing/mode.rb +76 -0
  138. data/lib/llm_cost_tracker/pricing/registry.rb +67 -45
  139. data/lib/llm_cost_tracker/pricing/service_charges.rb +210 -0
  140. data/lib/llm_cost_tracker/pricing/sync/fetcher.rb +26 -17
  141. data/lib/llm_cost_tracker/pricing/sync/registry_diff.rb +6 -15
  142. data/lib/llm_cost_tracker/pricing/sync/registry_writer.rb +50 -1
  143. data/lib/llm_cost_tracker/pricing/sync.rb +59 -10
  144. data/lib/llm_cost_tracker/pricing/sync_change_printer.rb +32 -0
  145. data/lib/llm_cost_tracker/pricing.rb +220 -28
  146. data/lib/llm_cost_tracker/railtie.rb +6 -8
  147. data/lib/llm_cost_tracker/reconcile_tasks.rb +134 -0
  148. data/lib/llm_cost_tracker/reconciliation/diff.rb +428 -0
  149. data/lib/llm_cost_tracker/reconciliation/diff_result.rb +48 -0
  150. data/lib/llm_cost_tracker/reconciliation/import_result.rb +19 -0
  151. data/lib/llm_cost_tracker/reconciliation/importer.rb +253 -0
  152. data/lib/llm_cost_tracker/reconciliation/sources/anthropic_usage.rb +171 -0
  153. data/lib/llm_cost_tracker/reconciliation/sources/fingerprint.rb +20 -0
  154. data/lib/llm_cost_tracker/reconciliation/sources/openai_usage.rb +142 -0
  155. data/lib/llm_cost_tracker/reconciliation.rb +118 -0
  156. data/lib/llm_cost_tracker/report/data.rb +19 -8
  157. data/lib/llm_cost_tracker/report.rb +0 -4
  158. data/lib/llm_cost_tracker/retention.rb +22 -9
  159. data/lib/llm_cost_tracker/tags/context.rb +2 -5
  160. data/lib/llm_cost_tracker/tags/key.rb +4 -0
  161. data/lib/llm_cost_tracker/tags/sanitizer.rb +71 -20
  162. data/lib/llm_cost_tracker/timing.rb +15 -0
  163. data/lib/llm_cost_tracker/token_usage.rb +64 -42
  164. data/lib/llm_cost_tracker/tracker.rb +97 -27
  165. data/lib/llm_cost_tracker/usage_capture.rb +29 -8
  166. data/lib/llm_cost_tracker/version.rb +1 -1
  167. data/lib/llm_cost_tracker.rb +45 -35
  168. data/lib/tasks/llm_cost_tracker.rake +45 -17
  169. metadata +71 -41
  170. data/app/models/llm_cost_tracker/ingestion/event.rb +0 -13
  171. data/app/models/llm_cost_tracker/ledger/call.rb +0 -45
  172. data/app/models/llm_cost_tracker/ledger/call_metrics.rb +0 -66
  173. data/app/models/llm_cost_tracker/ledger/period/grouping.rb +0 -71
  174. data/app/models/llm_cost_tracker/ledger/period/total.rb +0 -13
  175. data/app/models/llm_cost_tracker/ledger/tags/accessors.rb +0 -19
  176. data/lib/llm_cost_tracker/configuration/instrumentation.rb +0 -33
  177. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_ingestion_generator.rb +0 -29
  178. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_latency_ms_generator.rb +0 -29
  179. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_period_totals_generator.rb +0 -29
  180. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_token_usage_generator.rb +0 -42
  181. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_ingestion_to_llm_cost_tracker.rb.erb +0 -33
  182. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_latency_ms_to_llm_api_calls.rb.erb +0 -9
  183. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_period_totals_to_llm_cost_tracker.rb.erb +0 -104
  184. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_provider_response_id_to_llm_api_calls.rb.erb +0 -15
  185. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_streaming_to_llm_api_calls.rb.erb +0 -21
  186. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_token_usage_to_llm_api_calls.rb.erb +0 -22
  187. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_api_calls.rb.erb +0 -83
  188. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_cost_precision.rb.erb +0 -26
  189. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_tags_to_jsonb.rb.erb +0 -44
  190. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_cost_precision_generator.rb +0 -29
  191. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_tags_to_jsonb_generator.rb +0 -29
  192. data/lib/llm_cost_tracker/ledger/rollups/batch.rb +0 -43
  193. data/lib/llm_cost_tracker/ledger/schema/period_totals.rb +0 -32
  194. data/lib/llm_cost_tracker/pricing/components.rb +0 -37
  195. data/lib/llm_cost_tracker/pricing/sync/registry_loader.rb +0 -63
@@ -2,6 +2,7 @@
2
2
 
3
3
  require "active_support/core_ext/integer/time"
4
4
 
5
+ require_relative "../billing/cost_status"
5
6
  require_relative "../ledger"
6
7
 
7
8
  module LlmCostTracker
@@ -32,17 +33,18 @@ module LlmCostTracker
32
33
  breakdown_limit = nil unless breakdown_limit.positive?
33
34
  end
34
35
  from = now - days.days
35
- scope = Ledger::Call.where(tracked_at: from..now)
36
+ scope = LlmCostTracker::Call.where(tracked_at: from..now)
36
37
  tag_breakdowns ||= LlmCostTracker.configuration.report_tag_breakdowns || []
38
+ aggregate = totals(scope)
37
39
 
38
40
  new(
39
41
  days: days,
40
42
  from_time: from,
41
43
  to_time: now,
42
- total_cost: scope.sum(:total_cost).to_f,
43
- requests_count: scope.count,
44
- average_latency_ms: average_latency_ms(scope),
45
- unknown_pricing_count: scope.where(total_cost: nil).count,
44
+ total_cost: aggregate.total_cost.to_f,
45
+ requests_count: aggregate.requests_count.to_i,
46
+ average_latency_ms: aggregate.average_latency_ms&.to_f,
47
+ unknown_pricing_count: aggregate.unknown_pricing_count.to_i,
46
48
  cost_by_provider: scope.cost_by_provider(limit: breakdown_limit).to_a,
47
49
  cost_by_model: scope.cost_by_model(limit: breakdown_limit).to_a,
48
50
  cost_by_tags: cost_by_tags(scope, tag_breakdowns, limit: breakdown_limit),
@@ -50,8 +52,17 @@ module LlmCostTracker
50
52
  )
51
53
  end
52
54
 
53
- def self.average_latency_ms(scope)
54
- scope.average(:latency_ms)&.to_f
55
+ def self.totals(scope)
56
+ scope
57
+ .select(
58
+ "COALESCE(SUM(total_cost), 0) AS total_cost, " \
59
+ "COUNT(*) AS requests_count, " \
60
+ "AVG(latency_ms) AS average_latency_ms, " \
61
+ "COALESCE(SUM(CASE WHEN total_cost IS NULL " \
62
+ "OR cost_status IN ('#{Billing::CostStatus::UNKNOWN}', '#{Billing::CostStatus::PARTIAL}') " \
63
+ "THEN 1 ELSE 0 END), 0) AS unknown_pricing_count"
64
+ )
65
+ .take
55
66
  end
56
67
 
57
68
  def self.cost_by_tags(scope, keys, limit:)
@@ -66,7 +77,7 @@ module LlmCostTracker
66
77
  .to_a
67
78
  end
68
79
 
69
- private_class_method :average_latency_ms, :cost_by_tags, :top_calls
80
+ private_class_method :cost_by_tags, :top_calls, :totals
70
81
  end
71
82
  end
72
83
  end
@@ -20,10 +20,6 @@ module LlmCostTracker
20
20
  rescue StandardError => e
21
21
  "Unable to build LLM cost report: #{e.class}: #{e.message}"
22
22
  end
23
-
24
- def data(days: Data::DEFAULT_DAYS, now: Time.now.utc, tag_breakdowns: nil)
25
- Data.build(days: days, now: now, tag_breakdowns: tag_breakdowns)
26
- end
27
23
  end
28
24
  end
29
25
  end
@@ -21,6 +21,17 @@ module LlmCostTracker
21
21
  deleted
22
22
  end
23
23
 
24
+ def prune_invoice_imports(older_than:, now: Time.now.utc)
25
+ cutoff = resolve_cutoff(older_than, now)
26
+ require_relative "ledger"
27
+ return 0 unless LlmCostTracker::ProviderInvoiceImport.table_exists?
28
+
29
+ LlmCostTracker::ProviderInvoiceImport
30
+ .where(state: %w[completed failed])
31
+ .where(finished_at: ...cutoff)
32
+ .delete_all
33
+ end
34
+
24
35
  private
25
36
 
26
37
  def resolve_cutoff(older_than, now)
@@ -49,20 +60,22 @@ module LlmCostTracker
49
60
  end
50
61
 
51
62
  def prune_batch(cutoff, batch_size)
52
- LlmCostTracker::Ledger::Call.transaction do
53
- rows = LlmCostTracker::Ledger::Call
54
- .where(tracked_at: ...cutoff)
55
- .order(:id)
56
- .limit(batch_size)
57
- .lock
58
- .pluck(:id, :tracked_at, :total_cost)
63
+ LlmCostTracker::Call.transaction do
64
+ rows = pluck_prunable(cutoff, batch_size)
59
65
  next 0 if rows.empty?
60
66
 
61
- deleted = LlmCostTracker::Ledger::Call.where(id: rows.map(&:first)).delete_all
62
- LlmCostTracker::Ledger::Rollups.decrement!(rows) if deleted.positive?
67
+ deleted = LlmCostTracker::Call.where(id: rows.map(&:first)).delete_all
68
+ if deleted.positive? && LlmCostTracker.configuration.cache_rollups
69
+ LlmCostTracker::Ledger::Rollups.decrement!(rows)
70
+ end
63
71
  deleted
64
72
  end
65
73
  end
74
+
75
+ def pluck_prunable(cutoff, batch_size)
76
+ LlmCostTracker::Call.where(tracked_at: ...cutoff).order(:id).limit(batch_size).lock
77
+ .pluck(:id, :tracked_at, :total_cost, :pricing_snapshot, :provider)
78
+ end
66
79
  end
67
80
  end
68
81
  end
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "active_support/core_ext/object/deep_dup"
4
3
  require "active_support/isolated_execution_state"
5
4
 
6
5
  module LlmCostTracker
@@ -11,7 +10,7 @@ module LlmCostTracker
11
10
  class << self
12
11
  def with(tags)
13
12
  stack = ActiveSupport::IsolatedExecutionState[KEY] || []
14
- ActiveSupport::IsolatedExecutionState[KEY] = stack + [(tags || {}).deep_dup.to_h]
13
+ ActiveSupport::IsolatedExecutionState[KEY] = stack + [Sanitizer.call((tags || {}).to_h)]
15
14
  yield
16
15
  ensure
17
16
  ActiveSupport::IsolatedExecutionState[KEY] = stack
@@ -21,9 +20,7 @@ module LlmCostTracker
21
20
  default_tags = LlmCostTracker.configuration.default_tags
22
21
  default_tags = default_tags.call if default_tags.respond_to?(:call)
23
22
 
24
- (default_tags || {}).deep_dup.to_h.merge(
25
- (ActiveSupport::IsolatedExecutionState[KEY] || []).reduce({}) { |merged, tags| merged.merge(tags) }
26
- )
23
+ Sanitizer.call(default_tags.to_h).merge(*Array(ActiveSupport::IsolatedExecutionState[KEY]))
27
24
  end
28
25
 
29
26
  def clear!
@@ -4,10 +4,14 @@ module LlmCostTracker
4
4
  module Tags
5
5
  module Key
6
6
  PATTERN = /\A[\w.-]+\z/
7
+ MAX_BYTESIZE = 64
7
8
 
8
9
  class << self
9
10
  def validate!(key, error_class: ArgumentError)
10
11
  key = key.to_s
12
+ if key.bytesize > MAX_BYTESIZE
13
+ raise error_class, "tag key exceeds #{MAX_BYTESIZE} bytes: #{key[0, 16].inspect}..."
14
+ end
11
15
  return key if key.match?(PATTERN)
12
16
 
13
17
  raise error_class, "invalid tag key: #{key.inspect}"
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "active_support/core_ext/string/inflections"
3
4
  require "json"
4
5
 
5
6
  module LlmCostTracker
@@ -7,42 +8,92 @@ module LlmCostTracker
7
8
  module Sanitizer
8
9
  REDACTED_VALUE = "[REDACTED]"
9
10
 
11
+ SECRET_VALUE_PATTERNS = [
12
+ /\Ask-(?:ant-|admin-|proj-|svcacct-|live-|test-)?[A-Za-z0-9_-]{16,}\z/,
13
+ /\AAKIA[0-9A-Z]{16}\z/,
14
+ /\Agh[opsur]_[A-Za-z0-9]{16,}\z/,
15
+ /\Agithub_pat_[A-Za-z0-9_]{20,}\z/,
16
+ /\Aeyj[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\z/i,
17
+ /\Abearer\s+[A-Za-z0-9_.-]{20,}\z/i,
18
+ /\Axox[abprs]-[A-Za-z0-9-]{10,}\z/,
19
+ /\A(?:sk|rk|pk)_(?:live|test)_[A-Za-z0-9]{20,}\z/,
20
+ /\AAIza[0-9A-Za-z_-]{35}\z/
21
+ ].freeze
22
+ private_constant :SECRET_VALUE_PATTERNS
23
+
10
24
  class << self
11
25
  def call(tags, config: LlmCostTracker.configuration)
12
26
  tags = (tags || {}).to_h
13
- tags.first([config.max_tag_count.to_i, 0].max).each_with_object({}) do |(key, value), sanitized|
14
- sanitized[key] = sanitized_value(key, value, config)
27
+ redacted = Array(config.redacted_tag_keys).map { |key| normalized_key(key) }
28
+ limit = [config.max_tag_value_bytesize.to_i, 0].max
29
+ max_count = [config.max_tag_count.to_i, 0].max
30
+ tags.to_a.last(max_count).each_with_object({}) do |(key, value), sanitized|
31
+ sanitized[key] = sanitized_value(key, value, redacted, limit)
15
32
  end
16
33
  end
17
34
 
18
35
  private
19
36
 
20
- def sanitized_value(key, value, config)
21
- return REDACTED_VALUE if redacted_key?(key, config)
37
+ def sanitized_value(key, value, redacted, limit)
38
+ return REDACTED_VALUE if redacted_key?(key, redacted)
22
39
 
23
- string = value_string(value)
24
- limit = [config.max_tag_value_bytesize.to_i, 0].max
25
- return value if string.bytesize <= limit
40
+ scrubbed = scrub_secrets(value)
41
+ return REDACTED_VALUE if scrubbed.equal?(REDACTED_SENTINEL)
26
42
 
27
- string.byteslice(0, limit).to_s.encode("UTF-8", invalid: :replace, undef: :replace)
43
+ scalar_truncate(scrubbed, limit)
28
44
  end
29
45
 
30
- def redacted_key?(key, config)
31
- normalized = normalized_key(key)
32
- Array(config.redacted_tag_keys).map { |redacted_key| normalized_key(redacted_key) }.any? do |candidate|
33
- redacted_key_component?(normalized, candidate)
46
+ REDACTED_SENTINEL = Object.new.freeze
47
+ private_constant :REDACTED_SENTINEL
48
+
49
+ def scalar_truncate(value, limit)
50
+ case value
51
+ when Hash
52
+ value.transform_values { |nested| scalar_truncate(nested, limit) }
53
+ when Array
54
+ value.map { |nested| scalar_truncate(nested, limit) }
55
+ else
56
+ return value if value == REDACTED_VALUE
57
+
58
+ string = value.to_s
59
+ return value if string.bytesize <= limit
60
+
61
+ string.byteslice(0, limit).encode("UTF-8", invalid: :replace, undef: :replace)
34
62
  end
35
63
  end
36
64
 
65
+ def scrub_secrets(value)
66
+ case value
67
+ when Hash
68
+ value.each_with_object({}) do |(key, nested), out|
69
+ scrubbed = scrub_secrets(nested)
70
+ out[key] = scrubbed.equal?(REDACTED_SENTINEL) ? REDACTED_VALUE : scrubbed
71
+ end
72
+ when Array
73
+ value.map do |nested|
74
+ scrubbed = scrub_secrets(nested)
75
+ scrubbed.equal?(REDACTED_SENTINEL) ? REDACTED_VALUE : scrubbed
76
+ end
77
+ else
78
+ secret_shaped?(value.to_s) ? REDACTED_SENTINEL : value
79
+ end
80
+ end
81
+
82
+ def secret_shaped?(string)
83
+ return false if string.bytesize < 16
84
+
85
+ SECRET_VALUE_PATTERNS.any? { |pattern| pattern.match?(string) }
86
+ end
87
+
88
+ def redacted_key?(key, redacted)
89
+ return false if redacted.empty?
90
+
91
+ normalized = normalized_key(key)
92
+ redacted.any? { |candidate| redacted_key_component?(normalized, candidate) }
93
+ end
94
+
37
95
  def normalized_key(key)
38
- key.to_s
39
- .gsub(/([A-Z]+)([A-Z][a-z])/, '\1_\2')
40
- .gsub(/([a-z\d])([A-Z])/, '\1_\2')
41
- .downcase
42
- .gsub(/[^a-z0-9]+/, "_")
43
- .gsub(/_+/, "_")
44
- .delete_prefix("_")
45
- .delete_suffix("_")
96
+ key.to_s.underscore.gsub(/[^a-z0-9]+/, "_").delete_prefix("_").delete_suffix("_")
46
97
  end
47
98
 
48
99
  def redacted_key_component?(key, candidate)
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LlmCostTracker
4
+ module Timing
5
+ module_function
6
+
7
+ def now_monotonic
8
+ Process.clock_gettime(Process::CLOCK_MONOTONIC)
9
+ end
10
+
11
+ def elapsed_ms(started_at)
12
+ ((now_monotonic - started_at) * 1000).round
13
+ end
14
+ end
15
+ end
@@ -1,67 +1,89 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "active_support/core_ext/hash/keys"
3
+ require_relative "billing/components"
4
+ require_relative "logging"
4
5
 
5
6
  module LlmCostTracker
7
+ KNOWN_TOKEN_KEYS = (
8
+ Billing::Components::TOKEN_PRICED.map(&:key) + %i[total hidden_output]
9
+ ).freeze
10
+
6
11
  TokenUsage = Data.define(
7
12
  :input_tokens,
8
13
  :cache_read_input_tokens,
9
14
  :cache_write_input_tokens,
10
- :cache_write_1h_input_tokens,
15
+ :cache_write_extended_input_tokens,
16
+ :audio_input_tokens,
17
+ :image_input_tokens,
11
18
  :output_tokens,
19
+ :audio_output_tokens,
20
+ :image_output_tokens,
12
21
  :total_tokens,
13
22
  :hidden_output_tokens
14
23
  ) do
15
- def self.build(input_tokens:, output_tokens:, cache_read_input_tokens: 0,
16
- cache_write_input_tokens: 0, cache_write_1h_input_tokens: 0,
17
- total_tokens: nil, hidden_output_tokens: 0)
18
- input = input_tokens.to_i
19
- output = output_tokens.to_i
20
- cache_read = cache_read_input_tokens.to_i
21
- cache_write = cache_write_input_tokens.to_i
22
- cache_write_1h = cache_write_1h_input_tokens.to_i
23
- calculated_total = input + cache_read + cache_write + cache_write_1h + output
24
- total = total_tokens.nil? ? calculated_total : [total_tokens.to_i, calculated_total].max
24
+ def self.build_from_tokens(tokens)
25
+ return tokens if tokens.is_a?(self)
26
+ raise ArgumentError, "tokens must be a Hash, got #{tokens.class}" unless tokens.respond_to?(:to_h)
25
27
 
26
- new(
27
- input_tokens: input,
28
- cache_read_input_tokens: cache_read,
29
- cache_write_input_tokens: cache_write,
30
- cache_write_1h_input_tokens: cache_write_1h,
31
- output_tokens: output,
32
- total_tokens: total,
33
- hidden_output_tokens: hidden_output_tokens.to_i
34
- )
35
- end
28
+ values = tokens.to_h.transform_keys { |key| key.to_s.to_sym }
29
+ warn_on_unknown_keys(values)
30
+ token_attributes = Billing::Components::TOKEN_PRICED.to_h do |component|
31
+ [component.token_key, values.fetch(component.key, 0)]
32
+ end
36
33
 
37
- def self.from_hash(attributes)
38
- attributes = attributes.to_h.symbolize_keys
39
- values = TokenUsage::COMPONENT_TOKEN_KEYS.to_h { |key| [key, attributes[key]] }
40
34
  build(
41
- **values,
42
- total_tokens: attributes[:total_tokens]
35
+ **token_attributes,
36
+ total_tokens: values[:total],
37
+ hidden_output_tokens: values.fetch(:hidden_output, 0)
43
38
  )
44
39
  end
45
40
 
46
- def price_quantities
47
- {
48
- input: input_tokens,
49
- cache_read_input: cache_read_input_tokens,
50
- cache_write_input: cache_write_input_tokens,
51
- cache_write_1h_input: cache_write_1h_input_tokens,
52
- output: output_tokens
53
- }
41
+ def self.warn_on_unknown_keys(values)
42
+ return if values.empty?
43
+ return if values.keys.intersect?(KNOWN_TOKEN_KEYS)
44
+
45
+ Logging.warn(
46
+ "tokens hash contains no recognized keys (#{values.keys.inspect}); " \
47
+ "expected one of #{KNOWN_TOKEN_KEYS.inspect}. Did you pass a raw provider response?"
48
+ )
54
49
  end
55
50
 
56
- def stored_attributes
57
- to_h.slice(*self.class::STORED_KEYS)
51
+ def self.non_negative_int(value)
52
+ [value.to_i, 0].max
58
53
  end
59
54
 
60
- def to_h
61
- super.compact
55
+ def self.build(input_tokens:, output_tokens:, cache_read_input_tokens: 0,
56
+ cache_write_input_tokens: 0, cache_write_extended_input_tokens: 0,
57
+ audio_input_tokens: 0, audio_output_tokens: 0,
58
+ image_input_tokens: 0, image_output_tokens: 0,
59
+ total_tokens: nil, hidden_output_tokens: 0)
60
+ input = non_negative_int(input_tokens)
61
+ output = non_negative_int(output_tokens)
62
+ cache_read = non_negative_int(cache_read_input_tokens)
63
+ cache_write = non_negative_int(cache_write_input_tokens)
64
+ cache_write_extended = non_negative_int(cache_write_extended_input_tokens)
65
+ audio_input = non_negative_int(audio_input_tokens)
66
+ audio_output = non_negative_int(audio_output_tokens)
67
+ image_input = non_negative_int(image_input_tokens)
68
+ image_output = non_negative_int(image_output_tokens)
69
+ hidden_output = non_negative_int(hidden_output_tokens)
70
+ calculated_total = input + cache_read + cache_write + cache_write_extended +
71
+ audio_input + image_input + output + audio_output + image_output
72
+ total = total_tokens ? [non_negative_int(total_tokens), calculated_total].max : calculated_total
73
+
74
+ new(
75
+ input_tokens: input,
76
+ cache_read_input_tokens: cache_read,
77
+ cache_write_input_tokens: cache_write,
78
+ cache_write_extended_input_tokens: cache_write_extended,
79
+ audio_input_tokens: audio_input,
80
+ image_input_tokens: image_input,
81
+ output_tokens: output,
82
+ audio_output_tokens: audio_output,
83
+ image_output_tokens: image_output,
84
+ total_tokens: total,
85
+ hidden_output_tokens: hidden_output
86
+ )
62
87
  end
63
88
  end
64
-
65
- TokenUsage::STORED_KEYS = TokenUsage.members.freeze
66
- TokenUsage::COMPONENT_TOKEN_KEYS = (TokenUsage.members - %i[total_tokens]).freeze
67
89
  end
@@ -1,19 +1,19 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "active_support/core_ext/object/blank"
4
+ require "bigdecimal"
4
5
  require "securerandom"
5
6
 
6
7
  require_relative "ingestion"
7
8
  require_relative "ledger"
9
+ require_relative "logging"
8
10
  require_relative "pricing"
11
+ require_relative "billing/cost_status"
9
12
 
10
13
  module LlmCostTracker
11
14
  class Tracker
12
15
  EVENT_NAME = "llm_request.llm_cost_tracker"
13
16
 
14
- USAGE_SOURCES = %i[response stream_final sdk_response ruby_llm manual unknown].freeze
15
- TRACKING_METADATA_KEYS = (TokenUsage.members.map(&:to_s) + %w[pricing_mode provider_response_id]).freeze
16
-
17
17
  class << self
18
18
  def enforce_budget!
19
19
  return unless LlmCostTracker.configuration.enabled
@@ -25,43 +25,73 @@ module LlmCostTracker
25
25
  return unless LlmCostTracker.configuration.enabled
26
26
 
27
27
  pricing_mode = Pricing.normalize_mode(pricing_mode) || capture.pricing_mode
28
- cost_data = Pricing.cost_for(
28
+ cost_data, pricing_snapshot, priced_line_items = Pricing.calculate(
29
29
  provider: capture.provider,
30
30
  model: capture.model,
31
- token_usage: capture.token_usage,
31
+ tokens: capture.token_usage,
32
+ line_items: capture.line_items,
32
33
  pricing_mode: pricing_mode
33
34
  )
34
35
 
35
- Pricing::Unknown.handle!(capture.model) unless cost_data
36
+ Pricing::Unknown.handle!(capture.model) if cost_data.nil? && capture.token_usage.total_tokens.positive?
36
37
 
37
38
  event = build_event(
38
39
  capture: capture,
39
40
  pricing_mode: pricing_mode,
40
41
  cost_data: cost_data,
42
+ pricing_snapshot: pricing_snapshot,
43
+ line_items: priced_line_items,
41
44
  metadata: metadata,
42
45
  latency_ms: latency_ms,
43
46
  context_tags: context_tags
44
47
  )
45
48
 
46
- ActiveSupport::Notifications.instrument(EVENT_NAME, event.to_h)
47
-
48
- Ingestion::Inbox.save(event)
49
+ save_event(event)
50
+ yield :after_save if block_given?
51
+ notify_subscribers(event)
49
52
  Budget.check!(event)
50
53
 
51
54
  event
52
55
  end
53
56
 
57
+ def save_event(event)
58
+ if LlmCostTracker.configuration.durable_ingestion
59
+ Ingestion::Inbox.save(event)
60
+ else
61
+ Ingestion::Inline.save(event)
62
+ end
63
+ end
64
+
65
+ def notify_subscribers(event)
66
+ return unless ActiveSupport::Notifications.notifier.listening?(EVENT_NAME)
67
+
68
+ ActiveSupport::Notifications.instrument(EVENT_NAME, event.to_h)
69
+ rescue StandardError => e
70
+ Logging.warn("Subscriber raised on #{EVENT_NAME}: #{e.class}: #{e.message}")
71
+ end
72
+
54
73
  private
55
74
 
56
- def build_event(capture:, pricing_mode:, cost_data:, metadata:, latency_ms:, context_tags:)
57
- usage_source = if capture.usage_source.nil?
58
- nil
59
- else
60
- symbol = capture.usage_source.to_sym
61
- USAGE_SOURCES.include?(symbol) ? symbol.to_s : nil
62
- end
63
- tags = metadata.to_h.reject { |key, _value| TRACKING_METADATA_KEYS.include?(key.to_s) }
64
- context_tags = context_tags.nil? ? LlmCostTracker::Tags::Context.tags : context_tags.to_h
75
+ def token_pricing_partial?(token_usage:, cost_data:)
76
+ return false unless cost_data
77
+
78
+ Billing::Components::TOKEN_PRICED.any? do |component|
79
+ token_usage.public_send(component.token_key).positive? && cost_data[component.cost_key].nil?
80
+ end
81
+ end
82
+
83
+ def build_event(capture:, pricing_mode:, cost_data:, pricing_snapshot:, line_items:,
84
+ metadata:, latency_ms:, context_tags:)
85
+ context_tags = (context_tags || LlmCostTracker::Tags::Context.tags).to_h
86
+ cost = cost_with_service_lines(cost_data, line_items)
87
+ cost_status = Billing::CostStatus.call(
88
+ token_usage: capture.token_usage,
89
+ usage_source: capture.usage_source,
90
+ token_cost: cost_data,
91
+ token_pricing_partial: token_pricing_partial?(token_usage: capture.token_usage, cost_data: cost_data),
92
+ service_line_items: line_items.reject(&:token?),
93
+ total_cost: cost&.fetch(:total_cost, nil)
94
+ )
65
95
 
66
96
  Event.new(
67
97
  event_id: SecureRandom.uuid,
@@ -69,15 +99,55 @@ module LlmCostTracker
69
99
  model: capture.model,
70
100
  token_usage: capture.token_usage,
71
101
  pricing_mode: pricing_mode,
72
- cost: cost_data,
73
- tags: LlmCostTracker::Tags::Sanitizer.call(
74
- context_tags.merge(tags)
75
- ).freeze,
76
- latency_ms: latency_ms.nil? ? nil : [latency_ms.to_i, 0].max,
77
- stream: capture.stream ? true : false,
78
- usage_source: usage_source,
79
- provider_response_id: capture.provider_response_id.to_s.presence,
80
- tracked_at: Time.now.utc
102
+ cost: cost,
103
+ tags: LlmCostTracker::Tags::Sanitizer.call(context_tags.merge(metadata.to_h)).freeze,
104
+ latency_ms: finite_latency_ms(latency_ms),
105
+ stream: capture.stream,
106
+ usage_source: capture.usage_source,
107
+ provider_response_id: capture.provider_response_id,
108
+ provider_project_id: capture.provider_project_id,
109
+ provider_api_key_id: capture.provider_api_key_id,
110
+ provider_workspace_id: capture.provider_workspace_id,
111
+ batch: capture.batch,
112
+ tracked_at: Time.now.utc,
113
+ cost_status: cost_status,
114
+ pricing_snapshot: pricing_snapshot,
115
+ line_items: line_items
116
+ )
117
+ end
118
+
119
+ def finite_latency_ms(latency_ms)
120
+ return nil if latency_ms.nil?
121
+
122
+ Integer(latency_ms).clamp(0, (1 << 31) - 1)
123
+ rescue ArgumentError, TypeError, FloatDomainError
124
+ nil
125
+ end
126
+
127
+ def cost_with_service_lines(cost_data, line_items)
128
+ priced_services = line_items.reject(&:token?).select(&:priced?)
129
+ return cost_data if priced_services.empty?
130
+
131
+ base_currency = (cost_data && cost_data[:currency]) || Billing::LineItem::USD
132
+ matching, mismatched = priced_services.partition { |line| line.currency.to_s == base_currency.to_s }
133
+ warn_currency_mismatch(mismatched, base_currency) if mismatched.any?
134
+
135
+ cost = cost_data ? cost_data.dup : {}
136
+ cost[:currency] ||= base_currency.to_s
137
+ return cost if matching.empty?
138
+
139
+ service_total = matching.sum(BigDecimal("0"), &:cost_value)
140
+ base_total = BigDecimal(cost.fetch(:total_cost, 0).to_s)
141
+ cost[:total_cost] = (base_total + service_total).round(8)
142
+ cost
143
+ end
144
+
145
+ def warn_currency_mismatch(lines, base_currency)
146
+ currencies = lines.map { |line| line.currency.to_s }.uniq.sort
147
+ Logging.warn(
148
+ "Service line currency mismatch: header is #{base_currency}, dropping " \
149
+ "#{lines.size} priced line(s) in #{currencies.join(', ')} from header total. " \
150
+ "Per-line costs are still recorded; header total reflects #{base_currency} only."
81
151
  )
82
152
  end
83
153
  end
@@ -3,6 +3,7 @@
3
3
  require "active_support/core_ext/object/blank"
4
4
 
5
5
  require_relative "pricing"
6
+ require_relative "billing/line_item"
6
7
 
7
8
  module LlmCostTracker
8
9
  UsageCapture = Data.define(
@@ -12,26 +13,46 @@ module LlmCostTracker
12
13
  :stream,
13
14
  :usage_source,
14
15
  :provider_response_id,
15
- :pricing_mode
16
+ :provider_project_id,
17
+ :provider_api_key_id,
18
+ :provider_workspace_id,
19
+ :batch,
20
+ :pricing_mode,
21
+ :line_items
16
22
  )
17
23
 
18
24
  class UsageCapture
19
25
  UNKNOWN_MODEL = "unknown"
20
26
 
27
+ def self.batch_from_pricing_mode?(pricing_mode)
28
+ pricing_mode.to_s.split("_").include?("batch")
29
+ end
30
+
21
31
  def self.build(**attributes)
32
+ pricing_mode = Pricing.normalize_mode(attributes[:pricing_mode])
33
+ batch = attributes[:batch]
34
+ batch = batch_from_pricing_mode?(pricing_mode) if batch.nil?
35
+
36
+ token_usage = attributes.fetch(:token_usage)
37
+ service_line_items = Array(attributes[:service_line_items]).map do |item|
38
+ item.is_a?(Billing::LineItem) ? item : Billing::LineItem.build(item)
39
+ end
40
+ line_items = attributes[:line_items] || (Billing::LineItem.from_token_usage(token_usage) + service_line_items)
41
+
22
42
  new(
23
43
  provider: attributes.fetch(:provider).to_s,
24
44
  model: attributes.fetch(:model).to_s.strip.presence || UNKNOWN_MODEL,
25
- token_usage: attributes.fetch(:token_usage),
45
+ token_usage: token_usage,
26
46
  stream: attributes[:stream] || false,
27
47
  usage_source: attributes[:usage_source],
28
- provider_response_id: attributes[:provider_response_id],
29
- pricing_mode: Pricing.normalize_mode(attributes[:pricing_mode])
48
+ provider_response_id: attributes[:provider_response_id].to_s.strip.presence,
49
+ provider_project_id: attributes[:provider_project_id].to_s.strip.presence,
50
+ provider_api_key_id: attributes[:provider_api_key_id].to_s.strip.presence,
51
+ provider_workspace_id: attributes[:provider_workspace_id].to_s.strip.presence,
52
+ batch: batch,
53
+ pricing_mode: pricing_mode,
54
+ line_items: line_items
30
55
  )
31
56
  end
32
-
33
- def to_h
34
- super.compact
35
- end
36
57
  end
37
58
  end