llm_cost_tracker 0.3.3 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +31 -0
  3. data/README.md +46 -25
  4. data/app/services/llm_cost_tracker/dashboard/data_quality.rb +96 -23
  5. data/app/services/llm_cost_tracker/dashboard/data_quality_aggregate.rb +81 -0
  6. data/app/views/llm_cost_tracker/data_quality/index.html.erb +65 -0
  7. data/lib/llm_cost_tracker/budget.rb +73 -22
  8. data/lib/llm_cost_tracker/configuration.rb +4 -0
  9. data/lib/llm_cost_tracker/cost.rb +1 -2
  10. data/lib/llm_cost_tracker/errors.rb +22 -3
  11. data/lib/llm_cost_tracker/event.rb +4 -0
  12. data/lib/llm_cost_tracker/event_metadata.rb +21 -15
  13. data/lib/llm_cost_tracker/generators/llm_cost_tracker/{add_monthly_totals_generator.rb → add_period_totals_generator.rb} +4 -4
  14. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_usage_breakdown_generator.rb +29 -0
  15. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_period_totals_to_llm_cost_tracker.rb.erb +96 -0
  16. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_usage_breakdown_to_llm_api_calls.rb.erb +29 -0
  17. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_api_calls.rb.erb +11 -5
  18. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +3 -1
  19. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/llm_cost_tracker_prices.yml.erb +11 -3
  20. data/lib/llm_cost_tracker/parsed_usage.rb +16 -7
  21. data/lib/llm_cost_tracker/parsers/anthropic.rb +24 -55
  22. data/lib/llm_cost_tracker/parsers/base.rb +80 -0
  23. data/lib/llm_cost_tracker/parsers/gemini.rb +17 -37
  24. data/lib/llm_cost_tracker/parsers/openai.rb +1 -6
  25. data/lib/llm_cost_tracker/parsers/openai_compatible.rb +6 -15
  26. data/lib/llm_cost_tracker/parsers/openai_usage.rb +25 -34
  27. data/lib/llm_cost_tracker/parsers/registry.rb +17 -2
  28. data/lib/llm_cost_tracker/period_total.rb +9 -0
  29. data/lib/llm_cost_tracker/price_registry.rb +14 -4
  30. data/lib/llm_cost_tracker/price_sync/merger.rb +1 -1
  31. data/lib/llm_cost_tracker/price_sync/raw_price.rb +3 -5
  32. data/lib/llm_cost_tracker/price_sync/sources/litellm.rb +2 -3
  33. data/lib/llm_cost_tracker/price_sync/sources/open_router.rb +2 -3
  34. data/lib/llm_cost_tracker/prices.json +30 -30
  35. data/lib/llm_cost_tracker/pricing.rb +44 -32
  36. data/lib/llm_cost_tracker/railtie.rb +2 -1
  37. data/lib/llm_cost_tracker/storage/active_record_rollups.rb +142 -0
  38. data/lib/llm_cost_tracker/storage/active_record_store.rb +35 -78
  39. data/lib/llm_cost_tracker/stream_collector.rb +4 -2
  40. data/lib/llm_cost_tracker/tags_column.rb +71 -14
  41. data/lib/llm_cost_tracker/tracker.rb +54 -32
  42. data/lib/llm_cost_tracker/usage_breakdown.rb +30 -0
  43. data/lib/llm_cost_tracker/version.rb +1 -1
  44. data/lib/llm_cost_tracker.rb +10 -3
  45. metadata +9 -4
  46. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_monthly_totals_to_llm_cost_tracker.rb.erb +0 -48
  47. data/lib/llm_cost_tracker/monthly_total.rb +0 -9
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b966913d302d5c5c3466615d1fa3983855c241f6cd9e3e26558c0fcc5fc4e7d5
4
- data.tar.gz: 52804e702d5f01e5a4d247e8b50e601dede2b328bd7075c68ffd5f472b3b0d58
3
+ metadata.gz: d2cdd5f30c6fbd8c0168549b0853e9d8bc54586e60921733ce11a89a1d86078c
4
+ data.tar.gz: c91384579df6acdeb04d24b62f8bf916040f98156fd2bc882c94afc534f7dba5
5
5
  SHA512:
6
- metadata.gz: 609ba1a18be86dce0b567b2ea33b3f3123da88683f0c65d9aef780f2e4854d1dde6686adfa505fc154d13da6dd6cb2b31d9f38c303de5fb22f6fda65c7f44aa7
7
- data.tar.gz: de372e0940b4cfc400dacfc6dbf9e00f256c6944209da8cceaadd20a318b8c7aa8982d5190e21d38640ad20d04cc86400a4e872ec640189796e045acf1f7dfad
6
+ metadata.gz: 88d61d6714101ee9e8162814f5527bde487eced83663d86a1f938b77bcee1e4fcb4db2c4dde763720a828368a779a8677da57ecf10b2fadec78a959e6fdce6a7
7
+ data.tar.gz: d2d2bb097058507c06c1ea330a0c7d5a63d2e92b824fd8e4e7640a052dff100038eadb2e097edb4500457c382368080414c5261a2cc0a4f69436ae2234fd420d
data/CHANGELOG.md CHANGED
@@ -4,6 +4,37 @@ Format: [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). Versioning: [S
4
4
 
5
5
  ## [Unreleased]
6
6
 
7
+ ## [0.4.1] - 2026-04-24
8
+
9
+ ### Changed
10
+
11
+ - Batched ActiveRecord period rollup writes and budget total reads.
12
+ - Memoized schema capability checks and refreshed them on `reset_column_information`.
13
+ - Install migration adds `[:model, :tracked_at]` composite index and drops redundant single-column `:provider` / `:model` indexes.
14
+ - Data Quality now reads counters and usage sums through one aggregate query.
15
+ - Parser URL matching, stream-event extraction, and custom parser registration now share a smaller base/registry extension surface.
16
+ - Added cookbook recipes for `ruby-openai`, `anthropic-sdk-ruby`, `gemini-ai`, `langchainrb`, Azure OpenAI, and LiteLLM proxy setups.
17
+
18
+ ### Fixed
19
+
20
+ - `llm_cost_tracker:add_period_totals` now imports legacy monthly rollups and backfills before adding the unique index.
21
+ - Budget docs now describe `:notify` across monthly, daily, and per-call budgets.
22
+
23
+ ## [0.4.0] - 2026-04-24
24
+
25
+ ### Changed
26
+
27
+ - BREAKING: Canonical usage and pricing now use `cache_read_input` / `cache_write_input` instead of `cached_input` / `cache_creation_input`.
28
+ - BREAKING: `Pricing.cost_for` now requires `provider:` and prefers provider-specific price entries before model-only entries.
29
+ - BREAKING: Fresh ActiveRecord installs include cache-read, cache-write, and hidden-output token/cost breakdown columns.
30
+ - BREAKING: ActiveRecord budget rollups now use `llm_cost_tracker_period_totals`.
31
+ - BREAKING: `llm_cost_tracker:add_monthly_totals` was replaced by `llm_cost_tracker:add_period_totals`.
32
+ - `llm_cost_tracker:add_usage_breakdown` generator for upgrading existing ActiveRecord installs.
33
+ - `llm_cost_tracker:add_period_totals` generator for upgrading existing ActiveRecord installs.
34
+ - Generic `pricing_mode` support with mode-prefixed local price keys.
35
+ - Data Quality now shows usage bucket totals and hidden-output share.
36
+ - Daily budget and per-call budget guardrails.
37
+
7
38
  ## [0.3.3] - 2026-04-24
8
39
 
9
40
  ### Added
data/README.md CHANGED
@@ -15,7 +15,7 @@ Every Rails app with LLM integrations eventually runs into the same question: wh
15
15
 
16
16
  ## What You Get
17
17
 
18
- - A local ActiveRecord ledger of provider, model, tokens, cost, latency, tags, streaming usage, and provider response IDs
18
+ - A local ActiveRecord ledger of provider, model, usage breakdown, cost, latency, tags, streaming usage, and provider response IDs
19
19
  - Faraday middleware plus explicit `track` / `track_stream` helpers for non-Faraday clients
20
20
  - Server-rendered Rails dashboard with overview, calls, tags, CSV export, and data-quality pages
21
21
  - Local pricing snapshots, price sync tasks, and budget guardrails
@@ -159,7 +159,9 @@ LlmCostTracker.track_stream(provider: "anthropic", model: "claude-sonnet-4-6") d
159
159
  end
160
160
  ```
161
161
 
162
- Run `bin/rails g llm_cost_tracker:add_streaming` once on existing installs to add the `stream` and `usage_source` columns. Run `bin/rails g llm_cost_tracker:add_provider_response_id` to persist provider-issued response IDs.
162
+ Run `bin/rails g llm_cost_tracker:add_streaming` once on existing installs to add the `stream` and `usage_source` columns. Run `bin/rails g llm_cost_tracker:add_provider_response_id` to persist provider-issued response IDs. Run `bin/rails g llm_cost_tracker:add_usage_breakdown` to add cache-read, cache-write, hidden-output, and pricing-mode columns.
163
+
164
+ More client-specific snippets live in [`docs/cookbook.md`](docs/cookbook.md).
163
165
 
164
166
  ### Manual tracking
165
167
 
@@ -176,6 +178,10 @@ LlmCostTracker.track(
176
178
  )
177
179
  ```
178
180
 
181
+ `input_tokens` is regular non-cache input. Put cache hits in
182
+ `cache_read_input_tokens` and cache writes in `cache_write_input_tokens`; total
183
+ tokens are calculated from the canonical billing breakdown.
184
+
179
185
  ## Configuration
180
186
 
181
187
  ```ruby
@@ -185,17 +191,19 @@ LlmCostTracker.configure do |config|
185
191
  config.default_tags = { app: "my_app", environment: Rails.env }
186
192
 
187
193
  config.monthly_budget = 500.00
194
+ config.daily_budget = 50.00
195
+ config.per_call_budget = 2.00
188
196
  config.budget_exceeded_behavior = :notify # :notify, :raise, :block_requests
189
197
  config.storage_error_behavior = :warn # :ignore, :warn, :raise
190
198
  config.unknown_pricing_behavior = :warn # :ignore, :warn, :raise
191
199
 
192
200
  config.on_budget_exceeded = ->(data) {
193
- SlackNotifier.notify("#alerts", "🚨 LLM budget $#{data[:monthly_total].round(2)} / $#{data[:budget]}")
201
+ SlackNotifier.notify("#alerts", "🚨 LLM #{data[:budget_type]} budget $#{data[:total].round(2)} / $#{data[:budget]}")
194
202
  }
195
203
 
196
204
  config.prices_file = Rails.root.join("config/llm_cost_tracker_prices.yml")
197
205
  config.pricing_overrides = {
198
- "ft:gpt-4o-mini:my-org" => { input: 0.30, cached_input: 0.15, output: 1.20 }
206
+ "ft:gpt-4o-mini:my-org" => { input: 0.30, cache_read_input: 0.15, output: 1.20 }
199
207
  }
200
208
 
201
209
  # Built-in: openrouter.ai, api.deepseek.com
@@ -203,7 +211,9 @@ LlmCostTracker.configure do |config|
203
211
  end
204
212
  ```
205
213
 
206
- Pricing is best effort. OpenRouter-style IDs like `openai/gpt-4o-mini` are normalized to built-in names when possible. Use `prices_file` / `pricing_overrides` for fine-tunes, gateway-specific IDs, enterprise discounts, batch pricing, or models the gem does not know.
214
+ Pricing is best effort. OpenRouter-style IDs like `openai/gpt-4o-mini` are normalized to built-in names when possible. Use `prices_file` / `pricing_overrides` for fine-tunes, gateway-specific IDs, enterprise discounts, alternate pricing modes, or models the gem does not know.
215
+ Provider-specific entries like `openai/gpt-4o-mini` win over model-only entries like `gpt-4o-mini`.
216
+ Pass `pricing_mode: :batch` to use optional mode-specific keys such as `batch_input` / `batch_output`; missing mode-specific keys fall back to standard `input` / `output` rates. The same pattern works for custom modes, for example `contract_input`.
207
217
 
208
218
  `storage_error_behavior = :warn` (default) lets LLM responses continue if storage fails; `:raise` exposes `StorageError#original_error`.
209
219
 
@@ -225,7 +235,7 @@ bin/rails generate llm_cost_tracker:prices
225
235
  {
226
236
  "metadata": { "updated_at": "2026-04-18", "currency": "USD", "unit": "1M tokens" },
227
237
  "models": {
228
- "my-gateway/gpt-4o-mini": { "input": 0.20, "cached_input": 0.10, "output": 0.80 }
238
+ "my-gateway/gpt-4o-mini": { "input": 0.20, "cache_read_input": 0.10, "output": 0.80, "batch_input": 0.10, "batch_output": 0.40 }
229
239
  }
230
240
  }
231
241
  ```
@@ -256,16 +266,22 @@ Large price changes are flagged during sync. If a specific entry is expected to
256
266
  ```ruby
257
267
  config.storage_backend = :active_record
258
268
  config.monthly_budget = 100.00
269
+ config.daily_budget = 10.00
270
+ config.per_call_budget = 1.00
259
271
  config.budget_exceeded_behavior = :block_requests
260
272
  ```
261
273
 
262
- - `:notify` — fire `on_budget_exceeded` after an event pushes the month over budget.
274
+ - `:notify` — fire `on_budget_exceeded` after an event pushes the monthly, daily, or per-call budget over the limit.
263
275
  - `:raise` — record the event, then raise `BudgetExceededError`.
264
- - `:block_requests` — block preflight when the stored monthly total is already over budget; still raises post-response on the event that crosses the line. Needs `:active_record` storage.
276
+ - `:block_requests` — block preflight when the stored monthly or daily total is already over budget; still raises post-response on the event that crosses the line. Needs `:active_record` storage for preflight.
277
+
278
+ `monthly_budget` and `daily_budget` are cumulative ledger limits. `per_call_budget` is a ceiling for a single priced event and runs after the response cost is known.
279
+
280
+ ActiveRecord installs keep `llm_cost_tracker_period_totals` in sync with atomic upserts. Budget preflight reads period rollups instead of scanning `llm_api_calls`.
265
281
 
266
282
  ```ruby
267
283
  rescue LlmCostTracker::BudgetExceededError => e
268
- # e.monthly_total, e.budget, e.last_event
284
+ # e.budget_type, e.total, e.budget, e.monthly_total, e.daily_total, e.call_cost, e.last_event
269
285
  ```
270
286
 
271
287
  `:block_requests` is a **guardrail, not a hard cap**. The preflight and the spend-recording write are separate statements, so under Puma / Sidekiq concurrency multiple workers can all pass the preflight and then collectively overshoot the budget. The setting reliably *stops new requests after the overshoot is visible* — it does not prevent the overshoot itself. For strict quotas use a provider- or gateway-level limit, or a database-backed counter outside this gem.
@@ -343,7 +359,7 @@ On other adapters tags fall back to JSON in a text column. `by_tag` uses JSONB c
343
359
  Upgrade an existing install:
344
360
 
345
361
  ```bash
346
- bin/rails generate llm_cost_tracker:add_monthly_totals # shared monthly budget rollups
362
+ bin/rails generate llm_cost_tracker:add_period_totals # shared budget rollups
347
363
  bin/rails generate llm_cost_tracker:upgrade_tags_to_jsonb # PG: text → jsonb + GIN
348
364
  bin/rails generate llm_cost_tracker:upgrade_cost_precision # widen cost columns
349
365
  bin/rails generate llm_cost_tracker:add_latency_ms
@@ -403,12 +419,14 @@ ActiveSupport::Notifications.subscribe("llm_request.llm_cost_tracker") do |*, pa
403
419
  # payload =>
404
420
  # {
405
421
  # provider: "openai", model: "gpt-4o",
406
- # input_tokens: 150, output_tokens: 42, total_tokens: 192, latency_ms: 248,
422
+ # input_tokens: 150, cache_read_input_tokens: 0, cache_write_input_tokens: 0,
423
+ # hidden_output_tokens: 0, output_tokens: 42, total_tokens: 192, latency_ms: 248,
407
424
  # cost: {
408
- # input_cost: 0.000375, cached_input_cost: 0.0,
409
- # cache_read_input_cost: 0.0, cache_creation_input_cost: 0.0,
410
- # output_cost: 0.00042, total_cost: 0.000795, currency: "USD"
425
+ # input_cost: 0.000375, cache_read_input_cost: 0.0,
426
+ # cache_write_input_cost: 0.0, output_cost: 0.00042,
427
+ # total_cost: 0.000795, currency: "USD"
411
428
  # },
429
+ # pricing_mode: "batch",
412
430
  # tags: { feature: "chat", user_id: 42 },
413
431
  # tracked_at: 2026-04-16 14:30:00 UTC
414
432
  # }
@@ -440,21 +458,23 @@ Configured hosts are parsed using the OpenAI-compatible usage shape (`prompt_tok
440
458
  For providers with a non-OpenAI usage shape:
441
459
 
442
460
  ```ruby
443
- require "uri"
444
-
445
461
  class AcmeParser < LlmCostTracker::Parsers::Base
462
+ HOSTS = %w[api.acme-llm.example].freeze
463
+ TRACKED_PATHS = %w[/v1/generate].freeze
464
+
465
+ def provider_names
466
+ %w[acme]
467
+ end
468
+
446
469
  def match?(url)
447
- uri = URI.parse(url.to_s)
448
- uri.host == "api.acme-llm.example" && uri.path == "/v1/generate"
449
- rescue URI::InvalidURIError
450
- false
470
+ match_uri?(url, hosts: HOSTS, exact_paths: TRACKED_PATHS)
451
471
  end
452
472
 
453
- def parse(request_url, request_body, response_status, response_body)
473
+ def parse(_request_url, _request_body, response_status, response_body)
454
474
  return nil unless response_status == 200
455
475
 
456
476
  payload = safe_json_parse(response_body)
457
- usage = payload&.dig("usage")
477
+ usage = payload.dig("usage")
458
478
  return nil unless usage
459
479
 
460
480
  LlmCostTracker::ParsedUsage.build(
@@ -466,7 +486,7 @@ class AcmeParser < LlmCostTracker::Parsers::Base
466
486
  end
467
487
  end
468
488
 
469
- LlmCostTracker::Parsers::Registry.register(AcmeParser.new)
489
+ LlmCostTracker::Parsers::Registry.register(AcmeParser)
470
490
  ```
471
491
 
472
492
  ## Supported providers
@@ -511,11 +531,12 @@ The gem is designed for multi-threaded hosts — Puma with `max_threads > 1` and
511
531
  - `:block_requests` is a best-effort guardrail, not a hard cap. Concurrent workers can pass preflight simultaneously and collectively overshoot the budget. Use an external quota system if you need a transactional cap.
512
532
  - Streaming capture relies on the provider emitting a final-usage event (OpenAI needs `stream_options: { include_usage: true }`); missing events are recorded with `usage_source: "unknown"` so they surface on the Data Quality page.
513
533
  - `provider_response_id` is stored only when the provider exposes a stable response object ID. Missing IDs stay `nil` and surface on the Data Quality page.
514
- - Anthropic cache TTL variants (1h vs 5min writes) not modeled separately.
515
- - OpenAI reasoning tokens included in output totals; separate reasoning-token attribution not stored.
534
+ - Cache write TTL variants (1h vs 5min writes) not modeled separately.
516
535
 
517
536
  ## Development
518
537
 
538
+ Architecture rules for future changes live in [`docs/architecture.md`](docs/architecture.md). Integration recipes live in [`docs/cookbook.md`](docs/cookbook.md).
539
+
519
540
  ```bash
520
541
  bundle install
521
542
  bundle exec rspec
@@ -13,40 +13,113 @@ module LlmCostTracker
13
13
  :stream_column_present,
14
14
  :missing_provider_response_id_count,
15
15
  :provider_response_id_column_present,
16
+ :usage_breakdown_column_present,
17
+ :input_tokens,
18
+ :cache_read_input_tokens,
19
+ :cache_write_input_tokens,
20
+ :output_tokens,
21
+ :hidden_output_tokens,
22
+ :input_cost,
23
+ :cache_read_input_cost,
24
+ :cache_write_input_cost,
25
+ :output_cost,
16
26
  :unknown_pricing_by_model
17
27
  )
18
28
 
19
29
  class DataQuality
20
30
  class << self
21
31
  def call(scope: LlmCostTracker::LlmApiCall.all)
22
- total = scope.count
23
- latency_present = LlmCostTracker::LlmApiCall.latency_column?
24
- stream_present = LlmCostTracker::LlmApiCall.stream_column?
25
- provider_response_id_present = LlmCostTracker::LlmApiCall.provider_response_id_column?
32
+ model = scope.klass
33
+ aggregates = DataQualityAggregate.call(scope: scope)
34
+ total = aggregates.fetch(:total_calls).to_i
26
35
 
27
36
  DataQualityStats.new(
28
37
  total_calls: total,
29
- unknown_pricing_count: scope.unknown_pricing.count,
30
- untagged_calls_count: total - scope.with_json_tags.count,
31
- missing_latency_count: latency_present ? scope.where(latency_ms: nil).count : nil,
32
- latency_column_present: latency_present,
33
- streaming_count: stream_present ? scope.streaming.count : nil,
34
- streaming_missing_usage_count: if stream_present && LlmCostTracker::LlmApiCall.usage_source_column?
35
- scope.streaming_missing_usage.count
36
- end,
37
- stream_column_present: stream_present,
38
- missing_provider_response_id_count: (
39
- provider_response_id_present ? scope.missing_provider_response_id.count : nil
40
- ),
41
- provider_response_id_column_present: provider_response_id_present,
42
- unknown_pricing_by_model: scope.unknown_pricing
43
- .group(:model)
44
- .order(Arel.sql("COUNT(*) DESC"))
45
- .count
46
- .first(10)
47
- .to_h
38
+ unknown_pricing_count: aggregates.fetch(:unknown_pricing_count).to_i,
39
+ untagged_calls_count: total - aggregates.fetch(:tagged_calls_count).to_i,
40
+ **latency_stats(aggregates, model:),
41
+ **stream_stats(aggregates, model:),
42
+ **provider_response_id_stats(aggregates, model:),
43
+ **usage_stats(aggregates, model:),
44
+ unknown_pricing_by_model: unknown_pricing_by_model(scope)
48
45
  )
49
46
  end
47
+
48
+ private
49
+
50
+ def latency_stats(aggregates, model:)
51
+ latency_present = model.latency_column?
52
+
53
+ {
54
+ missing_latency_count: latency_present ? aggregates.fetch(:missing_latency_count).to_i : nil,
55
+ latency_column_present: latency_present
56
+ }
57
+ end
58
+
59
+ def stream_stats(aggregates, model:)
60
+ stream_present = model.stream_column?
61
+ usage_source_present = model.usage_source_column?
62
+ streaming_missing_usage_count = nil
63
+ if stream_present && usage_source_present
64
+ streaming_missing_usage_count = aggregates.fetch(:streaming_missing_usage_count).to_i
65
+ end
66
+
67
+ {
68
+ streaming_count: stream_present ? aggregates.fetch(:streaming_count).to_i : nil,
69
+ streaming_missing_usage_count: streaming_missing_usage_count,
70
+ stream_column_present: stream_present
71
+ }
72
+ end
73
+
74
+ def provider_response_id_stats(aggregates, model:)
75
+ column_present = model.provider_response_id_column?
76
+ missing_provider_response_id_count = nil
77
+ if column_present
78
+ missing_provider_response_id_count = aggregates.fetch(:missing_provider_response_id_count).to_i
79
+ end
80
+
81
+ {
82
+ missing_provider_response_id_count: missing_provider_response_id_count,
83
+ provider_response_id_column_present: column_present
84
+ }
85
+ end
86
+
87
+ def usage_stats(aggregates, model:)
88
+ usage_breakdown_present = model.usage_breakdown_columns?
89
+ usage_breakdown_cost_present = model.usage_breakdown_cost_columns?
90
+ cache_read_input_cost = nil
91
+ cache_write_input_cost = nil
92
+ if usage_breakdown_cost_present
93
+ cache_read_input_cost = decimal_sum(aggregates.fetch(:cache_read_input_cost))
94
+ cache_write_input_cost = decimal_sum(aggregates.fetch(:cache_write_input_cost))
95
+ end
96
+
97
+ {
98
+ usage_breakdown_column_present: usage_breakdown_present,
99
+ input_tokens: aggregates.fetch(:input_tokens).to_i,
100
+ cache_read_input_tokens: usage_breakdown_present ? aggregates.fetch(:cache_read_input_tokens).to_i : nil,
101
+ cache_write_input_tokens: usage_breakdown_present ? aggregates.fetch(:cache_write_input_tokens).to_i : nil,
102
+ output_tokens: aggregates.fetch(:output_tokens).to_i,
103
+ hidden_output_tokens: usage_breakdown_present ? aggregates.fetch(:hidden_output_tokens).to_i : nil,
104
+ input_cost: decimal_sum(aggregates.fetch(:input_cost)),
105
+ cache_read_input_cost: cache_read_input_cost,
106
+ cache_write_input_cost: cache_write_input_cost,
107
+ output_cost: decimal_sum(aggregates.fetch(:output_cost))
108
+ }
109
+ end
110
+
111
+ def unknown_pricing_by_model(scope)
112
+ scope.unknown_pricing
113
+ .group(:model)
114
+ .order(Arel.sql("COUNT(*) DESC"))
115
+ .count
116
+ .first(10)
117
+ .to_h
118
+ end
119
+
120
+ def decimal_sum(value)
121
+ value.to_f.round(8)
122
+ end
50
123
  end
51
124
  end
52
125
  end
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LlmCostTracker
4
+ module Dashboard
5
+ class DataQualityAggregate
6
+ class << self
7
+ def call(scope:)
8
+ model = scope.klass
9
+ expressions = aggregate_expressions(scope, model:)
10
+ values = Array(scope.unscope(:order).pick(*expressions.values))
11
+
12
+ expressions.keys.zip(values).to_h
13
+ end
14
+
15
+ private
16
+
17
+ def aggregate_expressions(scope, model:)
18
+ usage_breakdown_present = model.usage_breakdown_columns?
19
+ usage_breakdown_cost_present = model.usage_breakdown_cost_columns?
20
+
21
+ expressions = {
22
+ total_calls: Arel.sql("COUNT(*)"),
23
+ unknown_pricing_count: conditional_count_expression("total_cost IS NULL"),
24
+ tagged_calls_count: tagged_calls_expression(model)
25
+ }
26
+
27
+ if model.latency_column?
28
+ expressions[:missing_latency_count] = conditional_count_expression("latency_ms IS NULL")
29
+ end
30
+ expressions[:streaming_count] = conditional_count_expression("stream") if model.stream_column?
31
+ if model.stream_column? && model.usage_source_column?
32
+ expressions[:streaming_missing_usage_count] =
33
+ conditional_count_expression("stream AND (usage_source = 'unknown' OR usage_source IS NULL)")
34
+ end
35
+ if model.provider_response_id_column?
36
+ expressions[:missing_provider_response_id_count] =
37
+ conditional_count_expression("provider_response_id IS NULL OR provider_response_id = ''")
38
+ end
39
+
40
+ usage_sum_columns(usage_breakdown_present, usage_breakdown_cost_present).each do |column|
41
+ expressions[column] = sum_expression(scope, column)
42
+ end
43
+
44
+ expressions
45
+ end
46
+
47
+ def usage_sum_columns(usage_breakdown_present, usage_breakdown_cost_present)
48
+ columns = %i[input_tokens output_tokens input_cost output_cost]
49
+ if usage_breakdown_present
50
+ columns += %i[cache_read_input_tokens cache_write_input_tokens hidden_output_tokens]
51
+ end
52
+ columns += %i[cache_read_input_cost cache_write_input_cost] if usage_breakdown_cost_present
53
+ columns
54
+ end
55
+
56
+ def conditional_count_expression(predicate)
57
+ Arel.sql("COALESCE(SUM(CASE WHEN #{predicate} THEN 1 ELSE 0 END), 0)")
58
+ end
59
+
60
+ def tagged_calls_expression(model)
61
+ table = model.quoted_table_name
62
+ column = "#{table}.#{model.connection.quote_column_name('tags')}"
63
+
64
+ Arel.sql(case
65
+ when model.tags_jsonb_column?
66
+ "COALESCE(SUM(CASE WHEN #{column} <> '{}'::jsonb THEN 1 ELSE 0 END), 0)"
67
+ when model.tags_mysql_json_column?
68
+ "COALESCE(SUM(CASE WHEN JSON_LENGTH(#{column}) > 0 THEN 1 ELSE 0 END), 0)"
69
+ else
70
+ "COALESCE(SUM(CASE WHEN #{column} IS NOT NULL AND #{column} <> '' " \
71
+ "AND #{column} <> '{}' THEN 1 ELSE 0 END), 0)"
72
+ end)
73
+ end
74
+
75
+ def sum_expression(scope, column)
76
+ Arel.sql("COALESCE(SUM(#{scope.connection.quote_column_name(column)}), 0)")
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end
@@ -2,6 +2,8 @@
2
2
  <% streaming_count = @stats.streaming_count %>
3
3
  <% streaming_missing_usage = @stats.streaming_missing_usage_count %>
4
4
  <% calls_with_provider_response_id = @stats.provider_response_id_column_present ? total - @stats.missing_provider_response_id_count : nil %>
5
+ <% billable_tokens = @stats.input_tokens + @stats.output_tokens + @stats.cache_read_input_tokens.to_i + @stats.cache_write_input_tokens.to_i %>
6
+ <% hidden_output_share = coverage_percent(@stats.hidden_output_tokens.to_i, @stats.output_tokens) %>
5
7
 
6
8
  <section class="lct-panel lct-toolbar">
7
9
  <div class="lct-toolbar-head">
@@ -118,6 +120,14 @@
118
120
  <p class="lct-stat-sub"><%= percent(coverage_percent(calls_with_provider_response_id, total)) %> of calls</p>
119
121
  </article>
120
122
  <% end %>
123
+
124
+ <% if @stats.usage_breakdown_column_present && @stats.output_tokens.positive? %>
125
+ <article class="lct-stat">
126
+ <p class="lct-stat-label">Hidden output share</p>
127
+ <p class="lct-stat-value"><%= percent(hidden_output_share) %></p>
128
+ <p class="lct-stat-sub"><%= number(@stats.hidden_output_tokens) %> of <%= number(@stats.output_tokens) %> output tokens</p>
129
+ </article>
130
+ <% end %>
121
131
  </div>
122
132
  </div>
123
133
  </section>
@@ -243,6 +253,61 @@
243
253
  </section>
244
254
  </section>
245
255
 
256
+ <% if @stats.usage_breakdown_column_present %>
257
+ <section class="lct-panel">
258
+ <div class="lct-section-head">
259
+ <div>
260
+ <h2 class="lct-section-title">Usage breakdown</h2>
261
+ </div>
262
+ </div>
263
+
264
+ <div class="lct-table-wrap">
265
+ <table class="lct-table lct-table-compact">
266
+ <thead>
267
+ <tr>
268
+ <th>Bucket</th>
269
+ <th class="lct-num">Tokens</th>
270
+ <th class="lct-num">Share</th>
271
+ <th class="lct-num">Cost</th>
272
+ </tr>
273
+ </thead>
274
+ <tbody>
275
+ <tr>
276
+ <td>Regular input</td>
277
+ <td class="lct-num"><%= number(@stats.input_tokens) %></td>
278
+ <td class="lct-num"><%= percent(coverage_percent(@stats.input_tokens, billable_tokens)) %></td>
279
+ <td class="lct-num"><%= money(@stats.input_cost) %></td>
280
+ </tr>
281
+ <tr>
282
+ <td>Cache read input</td>
283
+ <td class="lct-num"><%= number(@stats.cache_read_input_tokens) %></td>
284
+ <td class="lct-num"><%= percent(coverage_percent(@stats.cache_read_input_tokens, billable_tokens)) %></td>
285
+ <td class="lct-num<%= ' lct-num-muted' if @stats.cache_read_input_cost.nil? %>"><%= optional_money(@stats.cache_read_input_cost) %></td>
286
+ </tr>
287
+ <tr>
288
+ <td>Cache write input</td>
289
+ <td class="lct-num"><%= number(@stats.cache_write_input_tokens) %></td>
290
+ <td class="lct-num"><%= percent(coverage_percent(@stats.cache_write_input_tokens, billable_tokens)) %></td>
291
+ <td class="lct-num<%= ' lct-num-muted' if @stats.cache_write_input_cost.nil? %>"><%= optional_money(@stats.cache_write_input_cost) %></td>
292
+ </tr>
293
+ <tr>
294
+ <td>Output</td>
295
+ <td class="lct-num"><%= number(@stats.output_tokens) %></td>
296
+ <td class="lct-num"><%= percent(coverage_percent(@stats.output_tokens, billable_tokens)) %></td>
297
+ <td class="lct-num"><%= money(@stats.output_cost) %></td>
298
+ </tr>
299
+ <tr>
300
+ <td>Hidden output</td>
301
+ <td class="lct-num"><%= number(@stats.hidden_output_tokens) %></td>
302
+ <td class="lct-num"><%= percent(hidden_output_share) %> of output</td>
303
+ <td class="lct-num lct-num-muted">n/a</td>
304
+ </tr>
305
+ </tbody>
306
+ </table>
307
+ </div>
308
+ </section>
309
+ <% end %>
310
+
246
311
  <% unless @stats.unknown_pricing_by_model.empty? %>
247
312
  <section class="lct-panel">
248
313
  <div class="lct-section-head">