llm_cost_tracker 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +108 -0
  3. data/README.md +12 -5
  4. data/app/assets/llm_cost_tracker/application.css +65 -5
  5. data/app/controllers/llm_cost_tracker/application_controller.rb +25 -33
  6. data/app/controllers/llm_cost_tracker/assets_controller.rb +1 -1
  7. data/app/controllers/llm_cost_tracker/calls_controller.rb +5 -7
  8. data/app/controllers/llm_cost_tracker/data_quality_controller.rb +4 -0
  9. data/app/controllers/llm_cost_tracker/reconciliation_controller.rb +106 -0
  10. data/app/controllers/llm_cost_tracker/tags_controller.rb +15 -1
  11. data/app/helpers/llm_cost_tracker/application_helper.rb +10 -0
  12. data/app/helpers/llm_cost_tracker/inline_style_helper.rb +28 -0
  13. data/app/helpers/llm_cost_tracker/reconciliation_helper.rb +13 -0
  14. data/app/helpers/llm_cost_tracker/token_usage_helper.rb +5 -1
  15. data/app/models/llm_cost_tracker/call.rb +0 -3
  16. data/app/models/llm_cost_tracker/call_line_item.rb +1 -5
  17. data/app/models/llm_cost_tracker/call_rollup.rb +0 -3
  18. data/app/models/llm_cost_tracker/call_tag.rb +0 -4
  19. data/app/models/llm_cost_tracker/ingestion/inbox_entry.rb +0 -4
  20. data/app/models/llm_cost_tracker/ingestion/lease.rb +0 -3
  21. data/app/models/llm_cost_tracker/provider_invoice.rb +7 -3
  22. data/app/models/llm_cost_tracker/provider_invoice_import.rb +24 -0
  23. data/app/services/llm_cost_tracker/dashboard/data_quality.rb +33 -4
  24. data/app/services/llm_cost_tracker/dashboard/filter.rb +6 -4
  25. data/app/views/layouts/llm_cost_tracker/application.html.erb +6 -1
  26. data/app/views/llm_cost_tracker/calls/show.html.erb +25 -40
  27. data/app/views/llm_cost_tracker/dashboard/index.html.erb +9 -9
  28. data/app/views/llm_cost_tracker/data_quality/index.html.erb +91 -52
  29. data/app/views/llm_cost_tracker/reconciliation/index.html.erb +183 -0
  30. data/app/views/llm_cost_tracker/shared/_bar.html.erb +1 -1
  31. data/app/views/llm_cost_tracker/shared/_filters.html.erb +3 -0
  32. data/app/views/llm_cost_tracker/shared/_metric_stack.html.erb +1 -1
  33. data/app/views/llm_cost_tracker/tags/show.html.erb +60 -0
  34. data/config/routes.rb +3 -2
  35. data/lib/llm_cost_tracker/billing/components.rb +45 -3
  36. data/lib/llm_cost_tracker/billing/components.yml +71 -0
  37. data/lib/llm_cost_tracker/billing/line_item.rb +1 -1
  38. data/lib/llm_cost_tracker/budget.rb +4 -2
  39. data/lib/llm_cost_tracker/capture/stream_collector.rb +93 -20
  40. data/lib/llm_cost_tracker/capture/stream_tracker.rb +40 -5
  41. data/lib/llm_cost_tracker/configuration.rb +53 -1
  42. data/lib/llm_cost_tracker/dashboard_setup_state.rb +109 -0
  43. data/lib/llm_cost_tracker/doctor/cost_drift_check.rb +2 -0
  44. data/lib/llm_cost_tracker/doctor/ingestion_check.rb +26 -0
  45. data/lib/llm_cost_tracker/doctor/invoice_reconciliation_check.rb +164 -0
  46. data/lib/llm_cost_tracker/doctor/schema_check.rb +5 -2
  47. data/lib/llm_cost_tracker/doctor.rb +72 -3
  48. data/lib/llm_cost_tracker/engine.rb +9 -0
  49. data/lib/llm_cost_tracker/event.rb +1 -1
  50. data/lib/llm_cost_tracker/generators/llm_cost_tracker/call_rollups_generator.rb +43 -0
  51. data/lib/llm_cost_tracker/generators/llm_cost_tracker/durable_ingestion_generator.rb +43 -0
  52. data/lib/llm_cost_tracker/generators/llm_cost_tracker/install_generator.rb +13 -3
  53. data/lib/llm_cost_tracker/generators/llm_cost_tracker/reconciliation_generator.rb +34 -0
  54. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_call_rollups.rb.erb +15 -0
  55. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_calls.rb.erb +5 -58
  56. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_durable_ingestion.rb.erb +29 -0
  57. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_reconciliation.rb.erb +55 -0
  58. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +28 -25
  59. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_call_rollups_provider.rb.erb +20 -0
  60. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_call_tags_key_value_index.rb.erb +32 -0
  61. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_image_tokens.rb.erb +18 -0
  62. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_call_rollups_provider_generator.rb +38 -0
  63. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_call_tags_key_value_index_generator.rb +30 -0
  64. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_image_tokens_generator.rb +29 -0
  65. data/lib/llm_cost_tracker/ingestion/inbox.rb +0 -1
  66. data/lib/llm_cost_tracker/ingestion/inline.rb +22 -0
  67. data/lib/llm_cost_tracker/ingestion/worker.rb +10 -2
  68. data/lib/llm_cost_tracker/ingestion.rb +48 -10
  69. data/lib/llm_cost_tracker/integrations/anthropic.rb +24 -5
  70. data/lib/llm_cost_tracker/integrations/base.rb +22 -5
  71. data/lib/llm_cost_tracker/integrations/openai.rb +300 -66
  72. data/lib/llm_cost_tracker/integrations/ruby_llm.rb +105 -6
  73. data/lib/llm_cost_tracker/integrations.rb +19 -1
  74. data/lib/llm_cost_tracker/ledger/period/totals.rb +21 -5
  75. data/lib/llm_cost_tracker/ledger/rollups.rb +24 -10
  76. data/lib/llm_cost_tracker/ledger/schema/call_line_items.rb +30 -1
  77. data/lib/llm_cost_tracker/ledger/schema/call_rollups.rb +3 -3
  78. data/lib/llm_cost_tracker/ledger/schema/call_tags.rb +17 -2
  79. data/lib/llm_cost_tracker/ledger/schema/calls.rb +2 -0
  80. data/lib/llm_cost_tracker/ledger/schema/ingestion_inbox_entries.rb +47 -0
  81. data/lib/llm_cost_tracker/ledger/schema/ingestion_leases.rb +42 -0
  82. data/lib/llm_cost_tracker/ledger/schema/provider_invoice_imports.rb +46 -0
  83. data/lib/llm_cost_tracker/ledger/schema/provider_invoices.rb +2 -2
  84. data/lib/llm_cost_tracker/ledger/store.rb +14 -14
  85. data/lib/llm_cost_tracker/ledger/tags/encoding.rb +37 -0
  86. data/lib/llm_cost_tracker/ledger/tags/query.rb +2 -1
  87. data/lib/llm_cost_tracker/ledger.rb +2 -1
  88. data/lib/llm_cost_tracker/masking.rb +39 -0
  89. data/lib/llm_cost_tracker/middleware/faraday.rb +88 -29
  90. data/lib/llm_cost_tracker/parsers/anthropic.rb +22 -7
  91. data/lib/llm_cost_tracker/parsers/base.rb +5 -1
  92. data/lib/llm_cost_tracker/parsers/gemini.rb +4 -0
  93. data/lib/llm_cost_tracker/parsers/openai.rb +16 -2
  94. data/lib/llm_cost_tracker/parsers/openai_compatible.rb +5 -1
  95. data/lib/llm_cost_tracker/parsers/openai_service_charges.rb +49 -10
  96. data/lib/llm_cost_tracker/parsers/openai_usage.rb +124 -53
  97. data/lib/llm_cost_tracker/prices.json +110 -19
  98. data/lib/llm_cost_tracker/pricing/effective_prices.rb +5 -36
  99. data/lib/llm_cost_tracker/pricing/lookup.rb +36 -3
  100. data/lib/llm_cost_tracker/pricing/mode.rb +76 -0
  101. data/lib/llm_cost_tracker/pricing/registry.rb +3 -1
  102. data/lib/llm_cost_tracker/pricing/service_charges.rb +9 -3
  103. data/lib/llm_cost_tracker/pricing/sync/registry_writer.rb +50 -1
  104. data/lib/llm_cost_tracker/pricing/sync.rb +3 -1
  105. data/lib/llm_cost_tracker/pricing.rb +47 -19
  106. data/lib/llm_cost_tracker/railtie.rb +6 -0
  107. data/lib/llm_cost_tracker/reconcile_tasks.rb +134 -0
  108. data/lib/llm_cost_tracker/reconciliation/diff.rb +428 -0
  109. data/lib/llm_cost_tracker/reconciliation/diff_result.rb +48 -0
  110. data/lib/llm_cost_tracker/reconciliation/import_result.rb +19 -0
  111. data/lib/llm_cost_tracker/reconciliation/importer.rb +253 -0
  112. data/lib/llm_cost_tracker/reconciliation/sources/anthropic_usage.rb +171 -0
  113. data/lib/llm_cost_tracker/reconciliation/sources/fingerprint.rb +20 -0
  114. data/lib/llm_cost_tracker/reconciliation/sources/openai_usage.rb +142 -0
  115. data/lib/llm_cost_tracker/reconciliation.rb +118 -0
  116. data/lib/llm_cost_tracker/report/data.rb +4 -1
  117. data/lib/llm_cost_tracker/retention.rb +15 -2
  118. data/lib/llm_cost_tracker/tags/context.rb +3 -4
  119. data/lib/llm_cost_tracker/tags/sanitizer.rb +60 -4
  120. data/lib/llm_cost_tracker/token_usage.rb +10 -2
  121. data/lib/llm_cost_tracker/tracker.rb +45 -18
  122. data/lib/llm_cost_tracker/version.rb +1 -1
  123. data/lib/llm_cost_tracker.rb +9 -0
  124. data/lib/tasks/llm_cost_tracker.rake +25 -2
  125. metadata +36 -1
@@ -0,0 +1,253 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bigdecimal"
4
+ require "date"
5
+ require "json"
6
+
7
+ require_relative "import_result"
8
+ require_relative "../ledger/rollups"
9
+
10
+ module LlmCostTracker
11
+ module Reconciliation
12
+ class Importer
13
+ REQUIRED_FIELDS = %i[external_id period_start period_end].freeze
14
+ FORGIVING_METADATA_SOURCES = %i[csv].to_set.freeze
15
+ ENVELOPE_KEYS = %w[row_type meter authority match_basis].freeze
16
+
17
+ def initialize(source:, imported_at:, provider:, window: nil, strict_metadata: nil, cursor: nil)
18
+ @source = source.to_s
19
+ @provider = provider.to_s
20
+ @imported_at = imported_at
21
+ @window = coerce_window(window)
22
+ @cursor = cursor
23
+ @strict_metadata = strict_metadata.nil? ? !FORGIVING_METADATA_SOURCES.include?(source.to_sym) : strict_metadata
24
+ raise ArgumentError, "source must be present" if @source.empty?
25
+ raise ArgumentError, "provider must be present" if @provider.empty?
26
+ end
27
+
28
+ def call(rows)
29
+ import_record = nil
30
+ ensure_reconciliation_installed!
31
+ return ImportResult.empty if skippable?(rows)
32
+
33
+ import_record = open_import_record
34
+ result = perform_import(rows)
35
+ complete_import_record(import_record, result)
36
+ result.with(import_id: import_record&.id)
37
+ rescue StandardError => e
38
+ fail_import_record(import_record, e)
39
+ raise
40
+ end
41
+
42
+ private
43
+
44
+ attr_reader :source, :provider, :imported_at, :window, :cursor, :strict_metadata
45
+
46
+ def skippable?(rows)
47
+ (rows.nil? || rows.empty?) && cursor.nil?
48
+ end
49
+
50
+ def ensure_reconciliation_installed!
51
+ return if ProviderInvoice.table_exists?
52
+
53
+ raise Error,
54
+ "llm_cost_tracker_provider_invoices table is missing; " \
55
+ "run `rails generate llm_cost_tracker:reconciliation && rails db:migrate`"
56
+ end
57
+
58
+ def perform_import(rows)
59
+ return ImportResult.empty if rows.nil? || rows.empty?
60
+
61
+ normalized, errors = normalize_rows(rows)
62
+ if normalized.empty?
63
+ return ImportResult.new(inserted: 0, updated: 0, skipped: rows.size, errors: errors,
64
+ import_id: nil)
65
+ end
66
+
67
+ existing = existing_external_ids(normalized.map { |row| row[:external_id] })
68
+ rows_payload = normalized.map { |row| persistable_attributes(row) }
69
+ upsert_options = { record_timestamps: true }
70
+ upsert_options[:unique_by] = :external_id if ProviderInvoice.connection.supports_insert_conflict_target?
71
+ ProviderInvoice.upsert_all(rows_payload, **upsert_options)
72
+
73
+ inserted = normalized.count { |row| !existing.include?(row[:external_id]) }
74
+ updated = normalized.size - inserted
75
+ ImportResult.new(inserted: inserted, updated: updated, skipped: rows.size - normalized.size,
76
+ errors: errors, import_id: nil)
77
+ end
78
+
79
+ def open_import_record
80
+ return nil unless tracking_table_present?
81
+
82
+ ProviderInvoiceImport.create!(
83
+ source: source,
84
+ cursor: cursor,
85
+ window_start: window&.first,
86
+ window_end: window&.last,
87
+ state: ProviderInvoiceImport::STATE_RUNNING,
88
+ started_at: Time.now.utc
89
+ )
90
+ end
91
+
92
+ def complete_import_record(record, result)
93
+ return unless record
94
+
95
+ terminal_state = result.success? ? ProviderInvoiceImport::STATE_COMPLETED : ProviderInvoiceImport::STATE_FAILED
96
+ record.update!(
97
+ state: terminal_state,
98
+ rows_imported: result.total_imported,
99
+ finished_at: Time.now.utc,
100
+ last_error: result.errors.first
101
+ )
102
+ end
103
+
104
+ def fail_import_record(record, error)
105
+ return unless record
106
+
107
+ record.update!(
108
+ state: ProviderInvoiceImport::STATE_FAILED,
109
+ last_error: "#{error.class}: #{error.message}",
110
+ finished_at: Time.now.utc
111
+ )
112
+ end
113
+
114
+ def tracking_table_present?
115
+ @tracking_table_present = ProviderInvoiceImport.table_exists? unless defined?(@tracking_table_present)
116
+ @tracking_table_present
117
+ end
118
+
119
+ def normalize_rows(rows)
120
+ errors = []
121
+ normalized = rows.each_with_index.filter_map do |row, index|
122
+ attrs = symbolize(row)
123
+ missing = REQUIRED_FIELDS - attrs.keys
124
+ if missing.any?
125
+ errors << "row #{index}: missing #{missing.join(', ')}"
126
+ next
127
+ end
128
+ period_start = parse_date(attrs[:period_start])
129
+ period_end = parse_date(attrs[:period_end])
130
+ next unless within_window?(period_start, period_end)
131
+
132
+ attrs.merge(
133
+ external_id: namespaced_external_id(attrs[:external_id]),
134
+ period_start: period_start,
135
+ period_end: period_end,
136
+ metadata: parse_metadata(attrs[:metadata])
137
+ )
138
+ rescue ArgumentError => e
139
+ errors << "row #{index}: #{e.message}"
140
+ nil
141
+ end
142
+ [normalized, errors]
143
+ end
144
+
145
+ def within_window?(period_start, period_end)
146
+ return true if window.nil?
147
+
148
+ period_start <= window.last && period_end >= window.first
149
+ end
150
+
151
+ def coerce_window(window)
152
+ return nil if window.nil?
153
+ raise ArgumentError, "window must be a Range of dates" unless window.is_a?(Range)
154
+
155
+ Range.new(parse_date(window.first), parse_date(window.last))
156
+ end
157
+
158
+ def existing_external_ids(external_ids)
159
+ ProviderInvoice.where(external_id: external_ids).pluck(:external_id).to_set
160
+ end
161
+
162
+ def persistable_attributes(row)
163
+ billed_amount = row[:billed_amount] && BigDecimal(row[:billed_amount].to_s)
164
+ {
165
+ source: source,
166
+ external_id: row[:external_id],
167
+ period_start: row[:period_start],
168
+ period_end: row[:period_end],
169
+ billed_amount: billed_amount,
170
+ currency: (row[:currency] || Ledger::Rollups::DEFAULT_CURRENCY).to_s.upcase,
171
+ metadata: stamp_metadata(row[:metadata]),
172
+ imported_at: imported_at || Time.now.utc
173
+ }
174
+ end
175
+
176
+ BASIS_DIMENSIONS_BY_PRIORITY = [
177
+ %w[project provider_project_id],
178
+ %w[api_key provider_api_key_id],
179
+ %w[workspace provider_workspace_id],
180
+ %w[model model]
181
+ ].freeze
182
+ private_constant :BASIS_DIMENSIONS_BY_PRIORITY
183
+
184
+ def stamp_metadata(metadata)
185
+ merged = metadata_with_provider(metadata)
186
+ metadata_with_match_basis(merged)
187
+ end
188
+
189
+ def metadata_with_provider(metadata)
190
+ return { "provider" => provider } if metadata.nil? || metadata.empty?
191
+
192
+ existing = metadata["provider"] || metadata[:provider]
193
+ return metadata if existing.is_a?(String) && !existing.empty?
194
+
195
+ metadata.merge("provider" => provider)
196
+ end
197
+
198
+ def metadata_with_match_basis(metadata)
199
+ existing = metadata["match_basis"] || metadata[:match_basis]
200
+ return metadata if existing.is_a?(String) && !existing.empty?
201
+
202
+ inferred = BASIS_DIMENSIONS_BY_PRIORITY.find { |_basis, key| metadata[key] || metadata[key.to_sym] }
203
+ return metadata.merge("match_basis" => "period_only") if inferred.nil?
204
+
205
+ metadata.merge("match_basis" => inferred.first)
206
+ end
207
+
208
+ def namespaced_external_id(external_id)
209
+ raw = external_id.to_s
210
+ scope = source == provider ? source : "#{source}/#{provider}"
211
+ prefix = "#{scope}:"
212
+ raw.start_with?(prefix) ? raw : "#{prefix}#{raw}"
213
+ end
214
+
215
+ def symbolize(row)
216
+ return row if row.is_a?(Hash) && row.keys.all?(Symbol)
217
+
218
+ row.to_h.transform_keys { |key| key.to_s.to_sym }
219
+ end
220
+
221
+ def parse_date(value)
222
+ return value if value.is_a?(Date)
223
+
224
+ Date.parse(value.to_s)
225
+ end
226
+
227
+ def parse_metadata(metadata)
228
+ parsed = parse_metadata_payload(metadata)
229
+ validate_envelope!(parsed) if strict_metadata
230
+ parsed
231
+ end
232
+
233
+ def parse_metadata_payload(metadata)
234
+ return {} if metadata.nil?
235
+ return metadata if metadata.is_a?(Hash)
236
+
237
+ JSON.parse(metadata.to_s)
238
+ rescue JSON::ParserError => e
239
+ raise ArgumentError, "invalid metadata JSON: #{e.message}" if strict_metadata
240
+
241
+ {}
242
+ end
243
+
244
+ def validate_envelope!(metadata)
245
+ keys = metadata.keys.map(&:to_s)
246
+ missing = ENVELOPE_KEYS - keys
247
+ return if missing.empty?
248
+
249
+ raise ArgumentError, "metadata missing envelope keys: #{missing.join(', ')}"
250
+ end
251
+ end
252
+ end
253
+ end
@@ -0,0 +1,171 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bigdecimal"
4
+ require "json"
5
+ require "time"
6
+
7
+ require_relative "fingerprint"
8
+
9
+ module LlmCostTracker
10
+ module Reconciliation
11
+ module Sources
12
+ module AnthropicUsage
13
+ FINGERPRINT_KEYS = %i[
14
+ starting_at ending_at model workspace_id
15
+ service_tier context_window cost_type token_type description
16
+ inference_geo
17
+ ].freeze
18
+ ROW_TYPE_COST = "cost"
19
+ AUTHORITY_COST_API = "cost_api"
20
+ DEFAULT_METER = "tokens"
21
+ DATA_RESIDENCY_GEOS = %w[us].freeze
22
+ private_constant :DATA_RESIDENCY_GEOS
23
+
24
+ module_function
25
+
26
+ def parse(response, authority: AUTHORITY_COST_API, row_type: ROW_TYPE_COST)
27
+ payload = coerce_hash(response)
28
+ buckets = Array(payload[:data])
29
+ buckets.flat_map do |bucket|
30
+ rows_for_bucket(bucket, authority: authority, row_type: row_type)
31
+ end.compact
32
+ end
33
+
34
+ def rows_for_bucket(bucket, authority:, row_type:)
35
+ bucket = symbolize(bucket)
36
+ starting_at = bucket[:starting_at]
37
+ ending_at = bucket[:ending_at]
38
+ return [] unless starting_at && ending_at
39
+
40
+ period_start = parse_date(starting_at)
41
+ period_end = end_inclusive_date(ending_at)
42
+
43
+ Array(bucket[:results]).filter_map do |raw|
44
+ row_for_result(raw,
45
+ period_start: period_start, period_end: period_end,
46
+ starting_at: starting_at, ending_at: ending_at,
47
+ authority: authority, row_type: row_type)
48
+ end
49
+ rescue ArgumentError
50
+ []
51
+ end
52
+
53
+ def row_for_result(raw, period_start:, period_end:, starting_at:, ending_at:, authority:, row_type:)
54
+ result = symbolize(raw)
55
+ raw_amount = result[:amount]
56
+ return nil if raw_amount.nil?
57
+
58
+ fingerprint = fingerprint_for(result, starting_at: starting_at, ending_at: ending_at)
59
+ {
60
+ external_id: "cost-#{fingerprint}",
61
+ period_start: period_start,
62
+ period_end: period_end,
63
+ billed_amount: dollars_from_cents(raw_amount),
64
+ currency: (result[:currency] || "USD").to_s.upcase,
65
+ metadata: metadata_for(result, authority: authority, row_type: row_type)
66
+ }
67
+ end
68
+
69
+ def dollars_from_cents(amount)
70
+ (BigDecimal(amount.to_s) / 100).to_s("F")
71
+ end
72
+
73
+ def metadata_for(result, authority:, row_type:)
74
+ {
75
+ "row_type" => row_type,
76
+ "meter" => meter_for(result),
77
+ "authority" => authority,
78
+ "match_basis" => match_basis_for(result),
79
+ "model" => result[:model],
80
+ "pricing_mode" => pricing_mode_for(result),
81
+ "context_window" => result[:context_window],
82
+ "cost_type" => result[:cost_type],
83
+ "description" => result[:description],
84
+ "token_type" => result[:token_type],
85
+ "inference_geo" => result[:inference_geo],
86
+ "provider_workspace_id" => result[:workspace_id]
87
+ }.compact
88
+ end
89
+
90
+ def meter_for(result)
91
+ case result[:cost_type].to_s
92
+ when "web_search" then "web_search"
93
+ when "code_execution" then "code_execution_hour"
94
+ when "session_usage" then "session_usage"
95
+ when "tokens" then token_meter(result[:token_type].to_s)
96
+ else DEFAULT_METER
97
+ end
98
+ end
99
+
100
+ def token_meter(token_type)
101
+ return "cache_read_input_tokens" if token_type.include?("cache_read")
102
+ return "cache_creation_input_tokens" if token_type.include?("cache_creation")
103
+ return "input_tokens" if token_type.include?("input")
104
+ return "output_tokens" if token_type.include?("output")
105
+
106
+ DEFAULT_METER
107
+ end
108
+
109
+ def pricing_mode_for(result)
110
+ modes = []
111
+ modes << "batch" if result[:service_tier].to_s.downcase == "batch"
112
+ modes << "data_residency" if DATA_RESIDENCY_GEOS.include?(result[:inference_geo].to_s.downcase)
113
+ modes.empty? ? nil : modes.uniq.join("_")
114
+ end
115
+
116
+ def match_basis_for(result)
117
+ return "workspace" if result[:workspace_id]
118
+ return "model" if result[:model]
119
+
120
+ "period_only"
121
+ end
122
+
123
+ def fingerprint_for(result, starting_at:, ending_at:)
124
+ attributes = result.merge(starting_at: normalized_epoch(starting_at),
125
+ ending_at: normalized_epoch(ending_at))
126
+ Fingerprint.compute(FINGERPRINT_KEYS, attributes)
127
+ end
128
+
129
+ def normalized_epoch(value)
130
+ return value.to_i if value.is_a?(Numeric)
131
+
132
+ Time.parse(value.to_s).utc.to_i
133
+ rescue ArgumentError
134
+ value.to_s
135
+ end
136
+
137
+ def parse_date(value)
138
+ return value if value.is_a?(Date)
139
+ return Time.at(value).utc.to_date if value.is_a?(Numeric)
140
+
141
+ Time.parse(value.to_s).utc.to_date
142
+ end
143
+
144
+ def end_inclusive_date(value)
145
+ time = case value
146
+ when Numeric then Time.at(value).utc
147
+ when Date then value.to_time.utc
148
+ else Time.parse(value.to_s).utc
149
+ end
150
+ (time - 1).utc.to_date
151
+ end
152
+
153
+ def coerce_hash(response)
154
+ return {} if response.nil?
155
+ return symbolize(response) if response.is_a?(Hash)
156
+
157
+ parsed = JSON.parse(response.to_s)
158
+ raise ArgumentError, "Anthropic Usage payload must be a JSON object" unless parsed.is_a?(Hash)
159
+
160
+ symbolize(parsed)
161
+ rescue JSON::ParserError => e
162
+ raise ArgumentError, "Unable to parse Anthropic Usage payload: #{e.message}"
163
+ end
164
+
165
+ def symbolize(hash)
166
+ hash.to_h.transform_keys { |key| key.to_s.to_sym }
167
+ end
168
+ end
169
+ end
170
+ end
171
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "digest"
4
+
5
+ module LlmCostTracker
6
+ module Reconciliation
7
+ module Sources
8
+ module Fingerprint
9
+ DIGEST_LENGTH = 16
10
+
11
+ module_function
12
+
13
+ def compute(keys, attributes)
14
+ source_string = keys.map { |key| attributes[key].to_s }.join("|")
15
+ Digest::SHA256.hexdigest(source_string)[0, DIGEST_LENGTH]
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,142 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "time"
5
+
6
+ require_relative "fingerprint"
7
+
8
+ module LlmCostTracker
9
+ module Reconciliation
10
+ module Sources
11
+ module OpenaiUsage
12
+ FINGERPRINT_KEYS = %i[start_time end_time line_item model project_id api_key_id organization_id].freeze
13
+ ROW_TYPE_COST = "cost"
14
+ AUTHORITY_COST_API = "cost_api"
15
+ DEFAULT_METER = "tokens"
16
+
17
+ module_function
18
+
19
+ def parse(response, authority: AUTHORITY_COST_API, row_type: ROW_TYPE_COST)
20
+ payload = coerce_hash(response)
21
+ buckets = Array(payload[:data])
22
+ buckets.flat_map do |bucket|
23
+ rows_for_bucket(bucket, authority: authority, row_type: row_type)
24
+ end.compact
25
+ end
26
+
27
+ def rows_for_bucket(bucket, authority:, row_type:)
28
+ bucket = symbolize(bucket)
29
+ start_time = bucket[:start_time]
30
+ end_time = bucket[:end_time]
31
+ return [] unless start_time && end_time
32
+
33
+ period_start = epoch_to_date(start_time)
34
+ period_end = end_inclusive_date(end_time)
35
+
36
+ Array(bucket[:results]).filter_map do |raw|
37
+ row_for_result(raw,
38
+ period_start: period_start, period_end: period_end,
39
+ start_time: start_time, end_time: end_time,
40
+ authority: authority, row_type: row_type)
41
+ end
42
+ rescue ArgumentError
43
+ []
44
+ end
45
+
46
+ def row_for_result(raw, period_start:, period_end:, start_time:, end_time:, authority:, row_type:)
47
+ result = symbolize(raw)
48
+ amount = symbolize(result[:amount] || {})
49
+ billed_amount = amount[:value]
50
+ return nil if billed_amount.nil?
51
+
52
+ fingerprint = fingerprint_for(result, start_time: start_time, end_time: end_time)
53
+ {
54
+ external_id: "cost-#{fingerprint}",
55
+ period_start: period_start,
56
+ period_end: period_end,
57
+ billed_amount: billed_amount,
58
+ currency: (amount[:currency] || "USD").to_s.upcase,
59
+ metadata: metadata_for(result, authority: authority, row_type: row_type)
60
+ }
61
+ end
62
+
63
+ def metadata_for(result, authority:, row_type:)
64
+ {
65
+ "row_type" => row_type,
66
+ "meter" => meter_for(result),
67
+ "authority" => authority,
68
+ "match_basis" => match_basis_for(result),
69
+ "line_item" => result[:line_item],
70
+ "model" => result[:model],
71
+ "provider_project_id" => result[:project_id],
72
+ "provider_api_key_id" => result[:api_key_id],
73
+ "provider_workspace_id" => result[:organization_id]
74
+ }.compact
75
+ end
76
+
77
+ def meter_for(result)
78
+ line_item = result[:line_item].to_s.downcase
79
+ case line_item
80
+ when /web search/, /search content/ then "web_search"
81
+ when /file search/ then "file_search_storage"
82
+ when /code interpreter/, /container/ then "container_session"
83
+ else DEFAULT_METER
84
+ end
85
+ end
86
+
87
+ def match_basis_for(result)
88
+ return "project" if result[:project_id]
89
+ return "api_key" if result[:api_key_id]
90
+ return "model" if result[:model]
91
+
92
+ "period_only"
93
+ end
94
+
95
+ def fingerprint_for(result, start_time:, end_time:)
96
+ attributes = result.merge(start_time: normalized_epoch(start_time),
97
+ end_time: normalized_epoch(end_time))
98
+ Fingerprint.compute(FINGERPRINT_KEYS, attributes)
99
+ end
100
+
101
+ def normalized_epoch(value)
102
+ return value.to_i if value.is_a?(Numeric)
103
+
104
+ Time.parse(value.to_s).utc.to_i
105
+ rescue ArgumentError
106
+ value.to_s
107
+ end
108
+
109
+ def epoch_to_date(value)
110
+ return Time.at(Integer(value)).utc.to_date if value.is_a?(Numeric) || value.to_s.match?(/\A\d+\z/)
111
+
112
+ Time.parse(value.to_s).utc.to_date
113
+ end
114
+
115
+ def end_inclusive_date(value)
116
+ time = if value.is_a?(Numeric) || value.to_s.match?(/\A\d+\z/)
117
+ Time.at(Integer(value)).utc
118
+ else
119
+ Time.parse(value.to_s).utc
120
+ end
121
+ (time - 1).utc.to_date
122
+ end
123
+
124
+ def coerce_hash(response)
125
+ return {} if response.nil?
126
+ return symbolize(response) if response.is_a?(Hash)
127
+
128
+ parsed = JSON.parse(response.to_s)
129
+ raise ArgumentError, "OpenAI Costs payload must be a JSON object" unless parsed.is_a?(Hash)
130
+
131
+ symbolize(parsed)
132
+ rescue JSON::ParserError => e
133
+ raise ArgumentError, "Unable to parse OpenAI Costs payload: #{e.message}"
134
+ end
135
+
136
+ def symbolize(hash)
137
+ hash.to_h.transform_keys { |key| key.to_s.to_sym }
138
+ end
139
+ end
140
+ end
141
+ end
142
+ end
@@ -0,0 +1,118 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ require_relative "ledger/schema/provider_invoices"
6
+ require_relative "ledger/schema/provider_invoice_imports"
7
+ require_relative "reconciliation/import_result"
8
+ require_relative "reconciliation/importer"
9
+ require_relative "reconciliation/diff_result"
10
+ require_relative "reconciliation/diff"
11
+ require_relative "reconciliation/sources/fingerprint"
12
+ require_relative "reconciliation/sources/openai_usage"
13
+ require_relative "reconciliation/sources/anthropic_usage"
14
+
15
+ module LlmCostTracker
16
+ module Reconciliation
17
+ SUPPORTED_SOURCES = %i[openai anthropic gemini csv].freeze
18
+ DEFAULT_THRESHOLD_PERCENT = 5.0
19
+ INVOICE_FRESHNESS_DAYS = 14
20
+ SOURCE_TO_PROVIDER = {
21
+ "openai" => "openai",
22
+ "openai_usage" => "openai",
23
+ "anthropic" => "anthropic",
24
+ "anthropic_usage" => "anthropic",
25
+ "gemini" => "gemini"
26
+ }.freeze
27
+
28
+ SCHEMA_TABLES = {
29
+ Ledger::Schema::ProviderInvoices => "llm_cost_tracker_provider_invoices",
30
+ Ledger::Schema::ProviderInvoiceImports => "llm_cost_tracker_provider_invoice_imports"
31
+ }.freeze
32
+
33
+ class << self
34
+ def import(source:, rows:, provider: nil, imported_at: nil, window: nil,
35
+ strict_metadata: nil, cursor: nil)
36
+ ensure_enabled!
37
+ ensure_source_present!(source)
38
+ Importer.new(
39
+ source: source,
40
+ provider: resolve_provider(source: source, provider: provider),
41
+ imported_at: imported_at,
42
+ window: window,
43
+ strict_metadata: strict_metadata,
44
+ cursor: cursor
45
+ ).call(rows)
46
+ end
47
+
48
+ def diff(source:, period_start:, period_end:, provider: nil, scope: {}, currency: nil,
49
+ drilldown_limit: Diff::DEFAULT_DRILLDOWN_LIMIT)
50
+ ensure_enabled!
51
+ ensure_source_present!(source)
52
+ Diff.new(
53
+ source: source,
54
+ provider: resolve_provider(source: source, provider: provider),
55
+ period_start: period_start,
56
+ period_end: period_end,
57
+ scope: scope,
58
+ currency: currency,
59
+ drilldown_limit: drilldown_limit
60
+ ).call
61
+ end
62
+
63
+ def ensure_source_present!(source)
64
+ return unless source.to_s.empty?
65
+
66
+ raise ArgumentError, "source must be present"
67
+ end
68
+
69
+ def resolve_provider(source:, provider:)
70
+ return provider.to_s if provider
71
+
72
+ mapped = SOURCE_TO_PROVIDER[source.to_s]
73
+ return mapped if mapped
74
+
75
+ recorded = recorded_provider_for(source)
76
+ return recorded if recorded
77
+
78
+ known = SOURCE_TO_PROVIDER.keys.join(", ")
79
+ raise ArgumentError,
80
+ "provider: must be specified for reconciliation source #{source.inspect}; " \
81
+ "sources with a default provider mapping: #{known}"
82
+ end
83
+
84
+ def recorded_provider_for(source)
85
+ return nil unless LlmCostTracker::ProviderInvoice.table_exists?
86
+
87
+ metadata = LlmCostTracker::ProviderInvoice
88
+ .where(source: source.to_s)
89
+ .order(imported_at: :desc)
90
+ .limit(1)
91
+ .pick(:metadata)
92
+ value = metadata_provider_value(metadata)
93
+ value if value.is_a?(String) && !value.empty?
94
+ end
95
+
96
+ def metadata_provider_value(metadata)
97
+ case metadata
98
+ when Hash then metadata["provider"]
99
+ when String
100
+ parsed = JSON.parse(metadata) rescue nil # rubocop:disable Style/RescueModifier
101
+ parsed.is_a?(Hash) ? parsed["provider"] : nil
102
+ end
103
+ end
104
+
105
+ def enabled?
106
+ LlmCostTracker.configuration.reconciliation_enabled
107
+ end
108
+
109
+ def ensure_enabled!
110
+ return if enabled?
111
+
112
+ raise Error,
113
+ "reconciliation is disabled; set `config.reconciliation_enabled = true` in your initializer " \
114
+ "(requires admin/org-level provider API keys; see docs/upgrading.md)"
115
+ end
116
+ end
117
+ end
118
+ end