llm_cost_tracker 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +29 -1
  3. data/README.md +2 -1
  4. data/app/controllers/llm_cost_tracker/application_controller.rb +1 -1
  5. data/app/controllers/llm_cost_tracker/calls_controller.rb +16 -4
  6. data/app/helpers/llm_cost_tracker/application_helper.rb +1 -1
  7. data/app/models/llm_cost_tracker/provider_invoice_import.rb +9 -4
  8. data/app/services/llm_cost_tracker/dashboard/setup_state.rb +110 -0
  9. data/app/views/llm_cost_tracker/calls/show.html.erb +1 -1
  10. data/app/views/llm_cost_tracker/data_quality/index.html.erb +1 -1
  11. data/lib/llm_cost_tracker/billing/cost_status.rb +21 -25
  12. data/lib/llm_cost_tracker/billing/line_item.rb +15 -49
  13. data/lib/llm_cost_tracker/budget.rb +28 -6
  14. data/lib/llm_cost_tracker/capture/stream_collector.rb +35 -29
  15. data/lib/llm_cost_tracker/capture/stream_tracker.rb +1 -1
  16. data/lib/llm_cost_tracker/configuration.rb +31 -28
  17. data/lib/llm_cost_tracker/doctor/capture_verifier.rb +1 -1
  18. data/lib/llm_cost_tracker/doctor/ingestion_check.rb +8 -8
  19. data/lib/llm_cost_tracker/doctor/legacy_audit_check.rb +0 -2
  20. data/lib/llm_cost_tracker/doctor/legacy_billing_status_check.rb +0 -2
  21. data/lib/llm_cost_tracker/doctor.rb +6 -17
  22. data/lib/llm_cost_tracker/engine.rb +1 -2
  23. data/lib/llm_cost_tracker/errors.rb +3 -2
  24. data/lib/llm_cost_tracker/event.rb +47 -0
  25. data/lib/llm_cost_tracker/generators/llm_cost_tracker/{durable_ingestion_generator.rb → async_ingestion_generator.rb} +8 -8
  26. data/lib/llm_cost_tracker/generators/llm_cost_tracker/install_generator.rb +4 -23
  27. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/{create_llm_cost_tracker_durable_ingestion.rb.erb → create_llm_cost_tracker_async_ingestion.rb.erb} +3 -3
  28. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_reconciliation.rb.erb +6 -1
  29. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +14 -7
  30. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_call_rollups_provider.rb.erb +23 -8
  31. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_call_tags_key_value_index.rb.erb +5 -5
  32. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_provider_invoice_imports_provider.rb.erb +32 -0
  33. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_provider_invoices_metadata_index.rb.erb +25 -0
  34. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_call_rollups_provider_generator.rb +0 -9
  35. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_provider_invoice_imports_provider_generator.rb +31 -0
  36. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_provider_invoices_metadata_index_generator.rb +31 -0
  37. data/lib/llm_cost_tracker/ingestion/batch.rb +5 -2
  38. data/lib/llm_cost_tracker/ingestion/inbox.rb +3 -24
  39. data/lib/llm_cost_tracker/ingestion/pool.rb +44 -0
  40. data/lib/llm_cost_tracker/ingestion/worker.rb +22 -36
  41. data/lib/llm_cost_tracker/ingestion.rb +8 -9
  42. data/lib/llm_cost_tracker/integrations/anthropic.rb +28 -42
  43. data/lib/llm_cost_tracker/integrations/base.rb +14 -11
  44. data/lib/llm_cost_tracker/integrations/openai.rb +93 -66
  45. data/lib/llm_cost_tracker/integrations/ruby_llm.rb +18 -20
  46. data/lib/llm_cost_tracker/integrations.rb +14 -13
  47. data/lib/llm_cost_tracker/ledger/period/totals.rb +5 -3
  48. data/lib/llm_cost_tracker/ledger/rollups.rb +4 -13
  49. data/lib/llm_cost_tracker/ledger/schema/call_line_items.rb +11 -0
  50. data/lib/llm_cost_tracker/ledger/schema/call_rollups.rb +13 -3
  51. data/lib/llm_cost_tracker/ledger/schema/call_tags.rb +11 -0
  52. data/lib/llm_cost_tracker/ledger/schema/calls.rb +0 -4
  53. data/lib/llm_cost_tracker/ledger/schema/ingestion_inbox_entries.rb +13 -3
  54. data/lib/llm_cost_tracker/ledger/schema/ingestion_leases.rb +13 -3
  55. data/lib/llm_cost_tracker/ledger/schema/provider_invoice_imports.rb +19 -9
  56. data/lib/llm_cost_tracker/ledger/schema/provider_invoices.rb +26 -11
  57. data/lib/llm_cost_tracker/ledger/store.rb +21 -18
  58. data/lib/llm_cost_tracker/ledger/tags/query.rb +0 -1
  59. data/lib/llm_cost_tracker/logging.rb +0 -4
  60. data/lib/llm_cost_tracker/middleware/faraday.rb +44 -16
  61. data/lib/llm_cost_tracker/parsers/anthropic.rb +21 -28
  62. data/lib/llm_cost_tracker/parsers/azure.rb +46 -0
  63. data/lib/llm_cost_tracker/parsers/base.rb +53 -47
  64. data/lib/llm_cost_tracker/parsers/gemini.rb +20 -22
  65. data/lib/llm_cost_tracker/parsers/openai.rb +8 -40
  66. data/lib/llm_cost_tracker/parsers/openai_compatible.rb +26 -43
  67. data/lib/llm_cost_tracker/parsers/openai_service_charges.rb +45 -16
  68. data/lib/llm_cost_tracker/parsers/openai_usage.rb +16 -20
  69. data/lib/llm_cost_tracker/parsers.rb +31 -4
  70. data/lib/llm_cost_tracker/prices.json +567 -579
  71. data/lib/llm_cost_tracker/pricing/backfill.rb +140 -0
  72. data/lib/llm_cost_tracker/pricing/effective_prices.rb +2 -4
  73. data/lib/llm_cost_tracker/pricing/estimator.rb +33 -0
  74. data/lib/llm_cost_tracker/pricing/explainer.rb +4 -1
  75. data/lib/llm_cost_tracker/pricing/lookup.rb +37 -2
  76. data/lib/llm_cost_tracker/pricing/registry.rb +0 -7
  77. data/lib/llm_cost_tracker/pricing/service_charges.rb +5 -9
  78. data/lib/llm_cost_tracker/pricing/{sync_change_printer.rb → sync/change_printer.rb} +3 -3
  79. data/lib/llm_cost_tracker/pricing/sync/registry_writer.rb +14 -2
  80. data/lib/llm_cost_tracker/pricing/sync.rb +1 -9
  81. data/lib/llm_cost_tracker/pricing/unknown.rb +5 -2
  82. data/lib/llm_cost_tracker/pricing.rb +72 -27
  83. data/lib/llm_cost_tracker/providers/anthropic/tier_classification.rb +22 -0
  84. data/lib/llm_cost_tracker/providers/azure/hosts.rb +17 -0
  85. data/lib/llm_cost_tracker/providers/gemini/model_families.rb +17 -0
  86. data/lib/llm_cost_tracker/providers/openai/hosts.rb +35 -0
  87. data/lib/llm_cost_tracker/providers/openai/model_families.rb +51 -0
  88. data/lib/llm_cost_tracker/railtie.rb +3 -1
  89. data/lib/llm_cost_tracker/reconciliation/diff.rb +26 -45
  90. data/lib/llm_cost_tracker/reconciliation/diff_result.rb +0 -4
  91. data/lib/llm_cost_tracker/reconciliation/importer.rb +1 -0
  92. data/lib/llm_cost_tracker/reconciliation/sources/anthropic_usage.rb +4 -3
  93. data/lib/llm_cost_tracker/report.rb +0 -4
  94. data/lib/llm_cost_tracker/retention.rb +20 -8
  95. data/lib/llm_cost_tracker/tags/sanitizer.rb +13 -17
  96. data/lib/llm_cost_tracker/token_usage.rb +4 -0
  97. data/lib/llm_cost_tracker/tracker.rb +33 -74
  98. data/lib/llm_cost_tracker/version.rb +1 -1
  99. data/lib/llm_cost_tracker.rb +11 -15
  100. data/lib/tasks/llm_cost_tracker.rake +16 -2
  101. metadata +18 -7
  102. data/lib/llm_cost_tracker/dashboard_setup_state.rb +0 -109
  103. data/lib/llm_cost_tracker/ingestion/inline.rb +0 -22
  104. data/lib/llm_cost_tracker/usage_capture.rb +0 -58
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "base"
4
+ require_relative "../providers/anthropic/tier_classification"
4
5
 
5
6
  module LlmCostTracker
6
7
  module Integrations
@@ -111,7 +112,7 @@ module LlmCostTracker
111
112
 
112
113
  record_safely do
113
114
  LlmCostTracker::Tracker.record(
114
- capture: UsageCapture.build(
115
+ event: Event.build(
115
116
  provider: provider,
116
117
  model: model,
117
118
  token_usage: TokenUsage.build(
@@ -140,7 +141,7 @@ module LlmCostTracker
140
141
  hidden_output = object_value(response, :thinking_tokens, :reasoning_tokens).to_i
141
142
 
142
143
  LlmCostTracker::Tracker.record(
143
- capture: UsageCapture.build(
144
+ event: Event.build(
144
145
  provider: provider,
145
146
  model: model,
146
147
  pricing_mode: pricing_mode(provider: provider, response: response),
@@ -182,17 +183,14 @@ module LlmCostTracker
182
183
  end
183
184
 
184
185
  def provider_response_id(response)
185
- object_value(response, :id, :provider_response_id) || object_dig(response, :raw, :id)
186
+ object_value(response, :id, :provider_response_id)
186
187
  end
187
188
 
188
- ANTHROPIC_STANDARD_EQUIVALENT_SERVICE_TIERS = %w[standard standard_only priority].freeze
189
- private_constant :ANTHROPIC_STANDARD_EQUIVALENT_SERVICE_TIERS
190
-
191
189
  def pricing_mode(provider:, response:)
192
- raw = object_value(response, :pricing_mode, :service_tier) ||
193
- object_dig(response, :raw, :pricing_mode) ||
194
- object_dig(response, :raw, :service_tier)
195
- return nil if provider == "anthropic" && ANTHROPIC_STANDARD_EQUIVALENT_SERVICE_TIERS.include?(raw.to_s)
190
+ raw = object_value(response, :pricing_mode, :service_tier)
191
+ if provider == "anthropic" && LlmCostTracker::Providers::Anthropic::TierClassification.standard_equivalent_tier?(raw)
192
+ return nil
193
+ end
196
194
 
197
195
  raw
198
196
  end
@@ -202,14 +200,14 @@ module LlmCostTracker
202
200
  def complete(*args, **kwargs, &)
203
201
  integration = LlmCostTracker::Integrations::RubyLlm
204
202
  request = integration.request_params(args, kwargs)
205
- integration.enforce_budget!
203
+ integration.enforce_budget!(request: request)
206
204
  started_at = LlmCostTracker::Timing.now_monotonic
207
205
  response = super
208
206
  integration.record_completion(
209
207
  self,
210
208
  response,
211
209
  request: request,
212
- latency_ms: integration.elapsed_ms(started_at),
210
+ latency_ms: LlmCostTracker::Timing.elapsed_ms(started_at),
213
211
  stream: integration.streaming_request?(request, has_block: block_given?)
214
212
  )
215
213
  response
@@ -218,14 +216,14 @@ module LlmCostTracker
218
216
  def embed(*args, **kwargs)
219
217
  integration = LlmCostTracker::Integrations::RubyLlm
220
218
  request = integration.request_params(args, kwargs)
221
- integration.enforce_budget!
219
+ integration.enforce_budget!(request: request)
222
220
  started_at = LlmCostTracker::Timing.now_monotonic
223
221
  response = super
224
222
  integration.record_embedding(
225
223
  self,
226
224
  response,
227
225
  request: request,
228
- latency_ms: integration.elapsed_ms(started_at)
226
+ latency_ms: LlmCostTracker::Timing.elapsed_ms(started_at)
229
227
  )
230
228
  response
231
229
  end
@@ -233,14 +231,14 @@ module LlmCostTracker
233
231
  def transcribe(*args, **kwargs)
234
232
  integration = LlmCostTracker::Integrations::RubyLlm
235
233
  request = integration.request_params(args, kwargs)
236
- integration.enforce_budget!
234
+ integration.enforce_budget!(request: request)
237
235
  started_at = LlmCostTracker::Timing.now_monotonic
238
236
  response = super
239
237
  integration.record_transcription(
240
238
  self,
241
239
  response,
242
240
  request: request,
243
- latency_ms: integration.elapsed_ms(started_at)
241
+ latency_ms: LlmCostTracker::Timing.elapsed_ms(started_at)
244
242
  )
245
243
  response
246
244
  end
@@ -248,14 +246,14 @@ module LlmCostTracker
248
246
  def paint(*args, **kwargs)
249
247
  integration = LlmCostTracker::Integrations::RubyLlm
250
248
  request = integration.request_params(args, kwargs)
251
- integration.enforce_budget!
249
+ integration.enforce_budget!(request: request)
252
250
  started_at = LlmCostTracker::Timing.now_monotonic
253
251
  response = super
254
252
  integration.record_image(
255
253
  self,
256
254
  response,
257
255
  request: request,
258
- latency_ms: integration.elapsed_ms(started_at)
256
+ latency_ms: LlmCostTracker::Timing.elapsed_ms(started_at)
259
257
  )
260
258
  response
261
259
  end
@@ -263,14 +261,14 @@ module LlmCostTracker
263
261
  def moderate(*args, **kwargs)
264
262
  integration = LlmCostTracker::Integrations::RubyLlm
265
263
  request = integration.request_params(args, kwargs)
266
- integration.enforce_budget!
264
+ integration.enforce_budget!(request: request)
267
265
  started_at = LlmCostTracker::Timing.now_monotonic
268
266
  response = super
269
267
  integration.record_moderation(
270
268
  self,
271
269
  response,
272
270
  request: request,
273
- latency_ms: integration.elapsed_ms(started_at)
271
+ latency_ms: LlmCostTracker::Timing.elapsed_ms(started_at)
274
272
  )
275
273
  response
276
274
  end
@@ -2,19 +2,17 @@
2
2
 
3
3
  require_relative "errors"
4
4
  require_relative "logging"
5
- require_relative "integrations/openai"
6
- require_relative "integrations/anthropic"
7
- require_relative "integrations/ruby_llm"
8
5
 
9
6
  module LlmCostTracker
10
7
  module Integrations
11
- AVAILABLE = {
12
- openai: Openai,
13
- anthropic: Anthropic,
14
- ruby_llm: RubyLlm
15
- }.freeze
8
+ autoload :Base, "llm_cost_tracker/integrations/base"
9
+ autoload :Openai, "llm_cost_tracker/integrations/openai"
10
+ autoload :Anthropic, "llm_cost_tracker/integrations/anthropic"
11
+ autoload :RubyLlm, "llm_cost_tracker/integrations/ruby_llm"
16
12
 
13
+ INTEGRATION_CONSTANTS = { openai: :Openai, anthropic: :Anthropic, ruby_llm: :RubyLlm }.freeze
17
14
  DOUBLE_INSTRUMENTATION_OVERLAPS = %i[openai anthropic].freeze
15
+ private_constant :DOUBLE_INSTRUMENTATION_OVERLAPS
18
16
 
19
17
  module_function
20
18
 
@@ -25,7 +23,7 @@ module LlmCostTracker
25
23
  end
26
24
 
27
25
  def checks(names = LlmCostTracker.configuration.instrumented_integrations)
28
- return [Base::Result.new(:integrations, :ok, "no SDK integrations enabled")] if names.empty?
26
+ return [Base::Result.new(:ok, "integrations", "no SDK integrations enabled")] if names.empty?
29
27
 
30
28
  normalize(names).map { |name| fetch(name).status }
31
29
  end
@@ -48,14 +46,17 @@ module LlmCostTracker
48
46
  end
49
47
 
50
48
  def fetch(name)
51
- AVAILABLE.fetch(name) do
52
- message = "Unknown integration: #{name.inspect}. Use one of: #{names.join(', ')}"
53
- raise LlmCostTracker::Error, message
49
+ const_name = INTEGRATION_CONSTANTS[name.to_sym]
50
+ unless const_name
51
+ raise LlmCostTracker::Error,
52
+ "Unknown integration: #{name.inspect}. Use one of: #{names.join(', ')}"
54
53
  end
54
+
55
+ const_get(const_name)
55
56
  end
56
57
 
57
58
  def names
58
- AVAILABLE.keys
59
+ INTEGRATION_CONSTANTS.keys
59
60
  end
60
61
  end
61
62
  end
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "bigdecimal"
4
+
3
5
  require_relative "../period"
4
6
 
5
7
  module LlmCostTracker
@@ -26,12 +28,12 @@ module LlmCostTracker
26
28
  attr_reader :periods, :time
27
29
 
28
30
  def snapshot_totals
29
- values = periods.to_h { |period| [period, 0.0] }
31
+ values = periods.to_h { |period| [period, BigDecimal("0")] }
30
32
  period_by_name = periods.to_h { |period| [period.name, period] }
31
33
  sql = periods.map { |period| snapshot_select(period) }.join(" UNION ALL ")
32
34
  LlmCostTracker::Call.find_by_sql(sql).each do |row|
33
35
  period = period_by_name.fetch(row.period_key)
34
- values[period] = row.total_cost.to_f
36
+ values[period] = BigDecimal(row.total_cost.to_s)
35
37
  end
36
38
  values
37
39
  end
@@ -39,7 +41,7 @@ module LlmCostTracker
39
41
  def snapshot_select(period)
40
42
  start = Period.range_start(period, time)
41
43
  components = [period_total_sql(period, start)]
42
- components << pending_total_sql(start) if Ingestion.durable?
44
+ components << pending_total_sql(start) if Ingestion.async?
43
45
  "SELECT #{connection.quote(period.name)} AS period_key, " \
44
46
  "(#{components.join(') + (')}) AS total_cost"
45
47
  end
@@ -34,8 +34,8 @@ module LlmCostTracker
34
34
  private
35
35
 
36
36
  def period_rows(event)
37
- currency = currency_for(event)
38
- provider = provider_for(event)
37
+ currency = currency_from_snapshot(event.pricing_snapshot)
38
+ provider = event.provider.to_s
39
39
  Period::PERIODS.map do |period, name|
40
40
  {
41
41
  period: name,
@@ -61,8 +61,8 @@ module LlmCostTracker
61
61
 
62
62
  def call_rollups(events)
63
63
  events.each_with_object(Hash.new { |totals, key| totals[key] = BigDecimal("0") }) do |event, totals|
64
- currency = currency_for(event)
65
- provider = provider_for(event)
64
+ currency = currency_from_snapshot(event.pricing_snapshot)
65
+ provider = event.provider.to_s
66
66
  Period::PERIODS.each do |period, name|
67
67
  key = [name, Period.bucket(period, event.tracked_at), currency, provider]
68
68
  totals[key] += BigDecimal(event.total_cost.to_s)
@@ -118,20 +118,11 @@ module LlmCostTracker
118
118
  end
119
119
  end
120
120
 
121
- def currency_for(event)
122
- snapshot = event.respond_to?(:pricing_snapshot) ? event.pricing_snapshot : nil
123
- currency_from_snapshot(snapshot)
124
- end
125
-
126
121
  def currency_from_snapshot(snapshot)
127
122
  value = (snapshot.is_a?(Hash) && (snapshot["currency"] || snapshot[:currency])) || DEFAULT_CURRENCY
128
123
  value.to_s.upcase
129
124
  end
130
125
 
131
- def provider_for(event)
132
- (event.respond_to?(:provider) ? event.provider : nil).to_s
133
- end
134
-
135
126
  def upsert_call_rollups(rows)
136
127
  LlmCostTracker::CallRollup.upsert_all(
137
128
  rows,
@@ -42,6 +42,17 @@ module LlmCostTracker
42
42
  return ["#{table_name} table is missing"] unless connection.data_source_exists?(table_name)
43
43
 
44
44
  columns = LlmCostTracker::CallLineItem.columns_hash
45
+ cache = @schema_capabilities
46
+ return cache.fetch(:errors) if cache && cache.fetch(:columns).equal?(columns)
47
+
48
+ errors = compute_errors(connection, table_name, columns)
49
+ @schema_capabilities = { columns: columns, errors: errors }
50
+ errors
51
+ end
52
+
53
+ private
54
+
55
+ def compute_errors(connection, table_name, columns)
45
56
  errors = []
46
57
  missing = REQUIRED_COLUMNS - columns.keys
47
58
  errors << "missing columns: #{missing.join(', ')}" if missing.any?
@@ -16,8 +16,20 @@ module LlmCostTracker
16
16
  table_name = LlmCostTracker::CallRollup.table_name
17
17
  return ["#{table_name} table is missing"] unless connection.data_source_exists?(table_name)
18
18
 
19
+ columns = LlmCostTracker::CallRollup.columns_hash
20
+ cache = @schema_capabilities
21
+ return cache.fetch(:errors) if cache && cache.fetch(:columns).equal?(columns)
22
+
23
+ errors = compute_errors(connection, table_name, columns)
24
+ @schema_capabilities = { columns: columns, errors: errors }
25
+ errors
26
+ end
27
+
28
+ private
29
+
30
+ def compute_errors(connection, table_name, columns)
19
31
  errors = []
20
- missing = REQUIRED_COLUMNS - LlmCostTracker::CallRollup.columns_hash.keys
32
+ missing = REQUIRED_COLUMNS - columns.keys
21
33
  errors << "missing columns: #{missing.join(', ')}" if missing.any?
22
34
  unless unique_period_index?(connection, table_name)
23
35
  errors << "missing unique index: period, period_start, currency, provider"
@@ -25,8 +37,6 @@ module LlmCostTracker
25
37
  errors
26
38
  end
27
39
 
28
- private
29
-
30
40
  def unique_period_index?(connection, table_name)
31
41
  connection.index_exists?(table_name, UNIQUE_COLUMNS, unique: true)
32
42
  end
@@ -19,6 +19,17 @@ module LlmCostTracker
19
19
  return ["#{table_name} table is missing"] unless connection.data_source_exists?(table_name)
20
20
 
21
21
  columns = LlmCostTracker::CallTag.columns_hash
22
+ cache = @schema_capabilities
23
+ return cache.fetch(:errors) if cache && cache.fetch(:columns).equal?(columns)
24
+
25
+ errors = compute_errors(connection, table_name, columns)
26
+ @schema_capabilities = { columns: columns, errors: errors }
27
+ errors
28
+ end
29
+
30
+ private
31
+
32
+ def compute_errors(connection, table_name, columns)
22
33
  errors = []
23
34
  missing = REQUIRED_COLUMNS - columns.keys
24
35
  errors << "missing columns: #{missing.join(', ')}" if missing.any?
@@ -47,10 +47,6 @@ module LlmCostTracker
47
47
  private_constant :REQUIRED_INDEXES
48
48
 
49
49
  class << self
50
- def current_schema?
51
- current_schema_errors.empty?
52
- end
53
-
54
50
  def current_schema_errors
55
51
  schema_capabilities.fetch(:current_schema_errors)
56
52
  end
@@ -28,15 +28,25 @@ module LlmCostTracker
28
28
  table_name = LlmCostTracker::Ingestion::InboxEntry.table_name
29
29
  return ["#{table_name} table is missing"] unless connection.data_source_exists?(table_name)
30
30
 
31
+ columns = LlmCostTracker::Ingestion::InboxEntry.columns_hash
32
+ cache = @schema_capabilities
33
+ return cache.fetch(:errors) if cache && cache.fetch(:columns).equal?(columns)
34
+
35
+ errors = compute_errors(connection, table_name, columns)
36
+ @schema_capabilities = { columns: columns, errors: errors }
37
+ errors
38
+ end
39
+
40
+ private
41
+
42
+ def compute_errors(connection, table_name, columns)
31
43
  errors = []
32
- missing = REQUIRED_COLUMNS - LlmCostTracker::Ingestion::InboxEntry.columns_hash.keys
44
+ missing = REQUIRED_COLUMNS - columns.keys
33
45
  errors << "missing columns: #{missing.join(', ')}" if missing.any?
34
46
  errors << "missing unique index: event_id" unless event_id_unique_index?(connection, table_name)
35
47
  errors
36
48
  end
37
49
 
38
- private
39
-
40
50
  def event_id_unique_index?(connection, table_name)
41
51
  connection.index_exists?(table_name, UNIQUE_COLUMNS, unique: true)
42
52
  end
@@ -23,15 +23,25 @@ module LlmCostTracker
23
23
  table_name = LlmCostTracker::Ingestion::Lease.table_name
24
24
  return ["#{table_name} table is missing"] unless connection.data_source_exists?(table_name)
25
25
 
26
+ columns = LlmCostTracker::Ingestion::Lease.columns_hash
27
+ cache = @schema_capabilities
28
+ return cache.fetch(:errors) if cache && cache.fetch(:columns).equal?(columns)
29
+
30
+ errors = compute_errors(connection, table_name, columns)
31
+ @schema_capabilities = { columns: columns, errors: errors }
32
+ errors
33
+ end
34
+
35
+ private
36
+
37
+ def compute_errors(connection, table_name, columns)
26
38
  errors = []
27
- missing = REQUIRED_COLUMNS - LlmCostTracker::Ingestion::Lease.columns_hash.keys
39
+ missing = REQUIRED_COLUMNS - columns.keys
28
40
  errors << "missing columns: #{missing.join(', ')}" if missing.any?
29
41
  errors << "missing unique index: name" unless name_unique_index?(connection, table_name)
30
42
  errors
31
43
  end
32
44
 
33
- private
34
-
35
45
  def name_unique_index?(connection, table_name)
36
46
  connection.index_exists?(table_name, UNIQUE_COLUMNS, unique: true)
37
47
  end
@@ -7,10 +7,10 @@ module LlmCostTracker
7
7
  module Schema
8
8
  module ProviderInvoiceImports
9
9
  REQUIRED_COLUMNS = %w[
10
- source cursor window_start window_end state last_error
10
+ source provider cursor window_start window_end state last_error
11
11
  rows_imported started_at finished_at
12
12
  ].freeze
13
- SOURCE_STARTED_AT_INDEX = %i[source started_at].freeze
13
+ SOURCE_PROVIDER_STARTED_AT_INDEX = %i[source provider started_at].freeze
14
14
 
15
15
  class << self
16
16
  def current_schema_errors
@@ -19,25 +19,35 @@ module LlmCostTracker
19
19
  table_name = LlmCostTracker::ProviderInvoiceImport.table_name
20
20
  return ["#{table_name} table is missing"] unless connection.data_source_exists?(table_name)
21
21
 
22
- errors = []
23
- errors.concat(column_errors)
24
- errors.concat(index_errors(connection, table_name))
22
+ columns = LlmCostTracker::ProviderInvoiceImport.columns_hash
23
+ cache = @schema_capabilities
24
+ return cache.fetch(:errors) if cache && cache.fetch(:columns).equal?(columns)
25
+
26
+ errors = compute_errors(connection, table_name, columns)
27
+ @schema_capabilities = { columns: columns, errors: errors }
25
28
  errors
26
29
  end
27
30
 
28
31
  private
29
32
 
30
- def column_errors
31
- missing = REQUIRED_COLUMNS - LlmCostTracker::ProviderInvoiceImport.columns_hash.keys
33
+ def compute_errors(connection, table_name, columns)
34
+ errors = []
35
+ errors.concat(column_errors(columns))
36
+ errors.concat(index_errors(connection, table_name))
37
+ errors
38
+ end
39
+
40
+ def column_errors(columns)
41
+ missing = REQUIRED_COLUMNS - columns.keys
32
42
  return [] if missing.empty?
33
43
 
34
44
  ["missing columns: #{missing.join(', ')}"]
35
45
  end
36
46
 
37
47
  def index_errors(connection, table_name)
38
- return [] if connection.index_exists?(table_name, SOURCE_STARTED_AT_INDEX)
48
+ return [] if connection.index_exists?(table_name, SOURCE_PROVIDER_STARTED_AT_INDEX)
39
49
 
40
- ["missing index: source, started_at"]
50
+ ["missing index: source, provider, started_at"]
41
51
  end
42
52
  end
43
53
  end
@@ -19,27 +19,32 @@ module LlmCostTracker
19
19
  table_name = LlmCostTracker::ProviderInvoice.table_name
20
20
  return ["#{table_name} table is missing"] unless connection.data_source_exists?(table_name)
21
21
 
22
- errors = []
23
- errors.concat(column_errors)
24
- errors.concat(metadata_type_errors(connection))
25
- errors.concat(index_errors(connection, table_name))
22
+ columns = LlmCostTracker::ProviderInvoice.columns_hash
23
+ cache = @schema_capabilities
24
+ return cache.fetch(:errors) if cache && cache.fetch(:columns).equal?(columns)
25
+
26
+ errors = compute_errors(connection, table_name, columns)
27
+ @schema_capabilities = { columns: columns, errors: errors }
26
28
  errors
27
29
  end
28
30
 
29
31
  private
30
32
 
31
- def column_errors
32
- missing = REQUIRED_COLUMNS - LlmCostTracker::ProviderInvoice.columns_hash.keys
33
+ def compute_errors(connection, table_name, columns)
34
+ errors = []
35
+ errors.concat(column_errors(columns))
36
+ errors.concat(Adapter.json_column_errors(columns["metadata"], connection, "metadata"))
37
+ errors.concat(index_errors(connection, table_name))
38
+ errors
39
+ end
40
+
41
+ def column_errors(columns)
42
+ missing = REQUIRED_COLUMNS - columns.keys
33
43
  return [] if missing.empty?
34
44
 
35
45
  ["missing columns: #{missing.join(', ')}"]
36
46
  end
37
47
 
38
- def metadata_type_errors(connection)
39
- metadata = LlmCostTracker::ProviderInvoice.columns_hash["metadata"]
40
- Adapter.json_column_errors(metadata, connection, "metadata")
41
- end
42
-
43
48
  def index_errors(connection, table_name)
44
49
  errors = []
45
50
  unless connection.index_exists?(table_name, UNIQUE_INDEX_COLUMNS, unique: true)
@@ -48,8 +53,18 @@ module LlmCostTracker
48
53
  unless connection.index_exists?(table_name, SOURCE_PERIOD_INDEX_COLUMNS)
49
54
  errors << "missing index: source, currency, period_start"
50
55
  end
56
+ if Adapter.postgresql?(connection) && !gin_metadata_index?(connection, table_name)
57
+ errors << "missing GIN index on metadata " \
58
+ "(run bin/rails generate llm_cost_tracker:upgrade_provider_invoices_metadata_index)"
59
+ end
51
60
  errors
52
61
  end
62
+
63
+ def gin_metadata_index?(connection, table_name)
64
+ connection.indexes(table_name).any? do |index|
65
+ index.columns == ["metadata"] && index.using.to_s == "gin"
66
+ end
67
+ end
53
68
  end
54
69
  end
55
70
  end
@@ -11,27 +11,34 @@ module LlmCostTracker
11
11
  module Ledger
12
12
  class Store
13
13
  class << self
14
- def insert_many(events)
14
+ def insert(events, skip_existence_check: false)
15
15
  events = Array(events)
16
- return [] if events.empty?
16
+ return if events.empty?
17
17
 
18
- insertable = insertable_events(events)
18
+ insertable = skip_existence_check ? events : insertable_events(events)
19
+ return unless insertable.any?
19
20
 
20
- if insertable.any?
21
- LlmCostTracker::Call.transaction do
22
- rows = insertable.map { |event| attributes_for(event) }
23
- LlmCostTracker::Call.insert_all!(rows, record_timestamps: true, returning: false)
24
- call_ids = call_ids_for(insertable)
25
- insert_line_items(insertable, call_ids)
26
- insert_call_tags(insertable, call_ids)
27
- end
28
- increment_rollups_safely(insertable) if LlmCostTracker.configuration.cache_rollups
21
+ LlmCostTracker::Call.transaction do
22
+ rows = insertable.map { |event| attributes_for(event) }
23
+ call_ids = insert_calls_returning_ids(rows, insertable)
24
+ insert_line_items(insertable, call_ids)
25
+ insert_call_tags(insertable, call_ids)
29
26
  end
30
- events
27
+ increment_rollups_safely(insertable) if LlmCostTracker.configuration.cache_rollups
31
28
  end
32
29
 
33
30
  private
34
31
 
32
+ def insert_calls_returning_ids(rows, insertable)
33
+ if LlmCostTracker::Call.connection.supports_insert_returning?
34
+ result = LlmCostTracker::Call.insert_all!(rows, record_timestamps: true, returning: %i[id event_id])
35
+ result.rows.to_h { |id, event_id| [event_id, id] }
36
+ else
37
+ LlmCostTracker::Call.insert_all!(rows, record_timestamps: true, returning: false)
38
+ call_ids_for(insertable)
39
+ end
40
+ end
41
+
35
42
  def attributes_for(event)
36
43
  attributes = {
37
44
  event_id: event.event_id,
@@ -110,7 +117,7 @@ module LlmCostTracker
110
117
  {
111
118
  llm_cost_tracker_call_id: call_ids.fetch(event.event_id),
112
119
  key: key.to_s,
113
- value: tag_row_value(value)
120
+ value: Tags::Encoding.encode(value)
114
121
  }
115
122
  end
116
123
  end
@@ -119,10 +126,6 @@ module LlmCostTracker
119
126
  LlmCostTracker::CallTag.insert_all!(rows, record_timestamps: false, returning: false)
120
127
  end
121
128
 
122
- def tag_row_value(value)
123
- Tags::Encoding.encode(value)
124
- end
125
-
126
129
  def stored_details(details)
127
130
  (details || {}).transform_keys(&:to_s).transform_values { |value| Tags::Encoding.normalize_value(value) }
128
131
  end
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../schema/adapter"
4
3
  require_relative "encoding"
5
4
 
6
5
  module LlmCostTracker
@@ -9,10 +9,6 @@ module LlmCostTracker
9
9
  log(:debug, message)
10
10
  end
11
11
 
12
- def info(message)
13
- log(:info, message)
14
- end
15
-
16
12
  def warn(message)
17
13
  log(:warn, message)
18
14
  end