llm_cost_tracker 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +35 -0
  3. data/README.md +34 -14
  4. data/app/services/llm_cost_tracker/dashboard/data_quality.rb +101 -19
  5. data/app/views/llm_cost_tracker/data_quality/index.html.erb +65 -0
  6. data/lib/llm_cost_tracker/budget.rb +85 -21
  7. data/lib/llm_cost_tracker/configuration.rb +4 -0
  8. data/lib/llm_cost_tracker/cost.rb +1 -2
  9. data/lib/llm_cost_tracker/errors.rb +22 -3
  10. data/lib/llm_cost_tracker/event.rb +4 -0
  11. data/lib/llm_cost_tracker/event_metadata.rb +21 -15
  12. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_period_totals_generator.rb +29 -0
  13. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_usage_breakdown_generator.rb +29 -0
  14. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_period_totals_to_llm_cost_tracker.rb.erb +66 -0
  15. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_usage_breakdown_to_llm_api_calls.rb.erb +29 -0
  16. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_api_calls.rb.erb +15 -0
  17. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +3 -1
  18. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/llm_cost_tracker_prices.yml.erb +11 -3
  19. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_tags_to_jsonb.rb.erb +1 -0
  20. data/lib/llm_cost_tracker/middleware/faraday.rb +27 -9
  21. data/lib/llm_cost_tracker/parsed_usage.rb +16 -7
  22. data/lib/llm_cost_tracker/parsers/anthropic.rb +7 -6
  23. data/lib/llm_cost_tracker/parsers/base.rb +2 -1
  24. data/lib/llm_cost_tracker/parsers/gemini.rb +5 -2
  25. data/lib/llm_cost_tracker/parsers/openai_usage.rb +18 -5
  26. data/lib/llm_cost_tracker/period_total.rb +9 -0
  27. data/lib/llm_cost_tracker/price_registry.rb +14 -4
  28. data/lib/llm_cost_tracker/price_sync/merger.rb +1 -1
  29. data/lib/llm_cost_tracker/price_sync/raw_price.rb +3 -5
  30. data/lib/llm_cost_tracker/price_sync/sources/litellm.rb +2 -3
  31. data/lib/llm_cost_tracker/price_sync/sources/open_router.rb +2 -3
  32. data/lib/llm_cost_tracker/prices.json +30 -30
  33. data/lib/llm_cost_tracker/pricing.rb +44 -32
  34. data/lib/llm_cost_tracker/railtie.rb +2 -0
  35. data/lib/llm_cost_tracker/storage/active_record_rollups.rb +122 -0
  36. data/lib/llm_cost_tracker/storage/active_record_store.rb +38 -13
  37. data/lib/llm_cost_tracker/stream_collector.rb +5 -3
  38. data/lib/llm_cost_tracker/tags_column.rb +19 -0
  39. data/lib/llm_cost_tracker/tracker.rb +58 -32
  40. data/lib/llm_cost_tracker/unknown_pricing.rb +14 -0
  41. data/lib/llm_cost_tracker/usage_breakdown.rb +30 -0
  42. data/lib/llm_cost_tracker/version.rb +1 -1
  43. data/lib/llm_cost_tracker.rb +12 -3
  44. metadata +10 -4
  45. data/llm_cost_tracker.gemspec +0 -50
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6952282e6f93b4e5658ef9d2b9527d2a332cb2d6f483da25540c3a0d6672ed9b
4
- data.tar.gz: e66eaaeb99698abf9c0ff9e3f1305e6bb27a8b6c25355e94bce4baec5f5d3a50
3
+ metadata.gz: ccb9a8365f4a06026a4352385efa1318ac59ce403cb848e0c9aff992fc80f64c
4
+ data.tar.gz: f21503cd322e923dc5bde0139cc61bc1547cef01eac59fe7a3861e1ab33e9860
5
5
  SHA512:
6
- metadata.gz: '078d695498ed6f254a700ccb3381ace3feaa1f4880691a1a69be4bb435097202ecf28f2cbf065202a543186bdd3894aaedcc44bfacc2d2ffa25a54ddf6d1cc76'
7
- data.tar.gz: 2638ae3c579bd2c0f71a73b06719eb0a35d7b82e8614a74c5100f41b69693babfd3b613ecf18e7f6e7ea33210222432efa5a7ca718325c4c8fd12be9b8ab806e
6
+ metadata.gz: 304ab6de6404f070b21b1dd72ce9eae2b44fb2fc7845eae8831a04971ed2b8ec2b6f740bc082fb36cfa42d90f0be59ab5800d43d72b68f04918a113b6d7d8cbd
7
+ data.tar.gz: afa2e92a99062bb1e0b4a00ab1d0762ca688f1890e0d76a29801881e2319e68db217c036d8f8c5d99558b580d5d4c039f8b3334283631763ee093fd12d369329
data/CHANGELOG.md CHANGED
@@ -4,6 +4,41 @@ Format: [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). Versioning: [S
4
4
 
5
5
  ## [Unreleased]
6
6
 
7
+ ## [0.4.0] - 2026-04-24
8
+
9
+ ### Changed
10
+
11
+ - BREAKING: Canonical usage and pricing now use `cache_read_input` / `cache_write_input` instead of `cached_input` / `cache_creation_input`.
12
+ - BREAKING: `Pricing.cost_for` now requires `provider:` and prefers provider-specific price entries before model-only entries.
13
+ - BREAKING: Fresh ActiveRecord installs include cache-read, cache-write, and hidden-output token/cost breakdown columns.
14
+ - BREAKING: ActiveRecord budget rollups now use `llm_cost_tracker_period_totals`.
15
+ - BREAKING: `llm_cost_tracker:add_monthly_totals` was replaced by `llm_cost_tracker:add_period_totals`.
16
+ - `llm_cost_tracker:add_usage_breakdown` generator for upgrading existing ActiveRecord installs.
17
+ - `llm_cost_tracker:add_period_totals` generator for upgrading existing ActiveRecord installs.
18
+ - Generic `pricing_mode` support with mode-prefixed local price keys.
19
+ - Data Quality now shows usage bucket totals and hidden-output share.
20
+ - Daily budget and per-call budget guardrails.
21
+
22
+ ## [0.3.3] - 2026-04-24
23
+
24
+ ### Added
25
+
26
+ - Monthly rollup totals for ActiveRecord budget checks, plus `llm_cost_tracker:add_monthly_totals` for upgrading existing installs.
27
+
28
+ ### Changed
29
+
30
+ - ActiveRecord monthly totals now update through a single atomic upsert.
31
+ - Faraday stream capture overflow now records `usage_source: "unknown"` instead of dropping the tracked event.
32
+ - Budget `:notify` callbacks now fire only on the first event that crosses the monthly limit.
33
+
34
+ ### Fixed
35
+
36
+ - Treat `config.enabled = false` as a global kill switch for direct `track` and `track_stream` calls too.
37
+ - Deduplicate unknown-pricing warnings per model.
38
+ - Detect streaming requests from parsed JSON instead of raw body substring matching.
39
+ - Cap automatic SSE capture to avoid unbounded memory growth on large streaming responses.
40
+ - Warn that the generated PostgreSQL `tags -> jsonb` upgrade migration rewrites large tables and should run in a maintenance window.
41
+
7
42
  ## [0.3.2] - 2026-04-22
8
43
 
9
44
  ### Added
data/README.md CHANGED
@@ -15,7 +15,7 @@ Every Rails app with LLM integrations eventually runs into the same question: wh
15
15
 
16
16
  ## What You Get
17
17
 
18
- - A local ActiveRecord ledger of provider, model, tokens, cost, latency, tags, streaming usage, and provider response IDs
18
+ - A local ActiveRecord ledger of provider, model, usage breakdown, cost, latency, tags, streaming usage, and provider response IDs
19
19
  - Faraday middleware plus explicit `track` / `track_stream` helpers for non-Faraday clients
20
20
  - Server-rendered Rails dashboard with overview, calls, tags, CSV export, and data-quality pages
21
21
  - Local pricing snapshots, price sync tasks, and budget guardrails
@@ -159,7 +159,7 @@ LlmCostTracker.track_stream(provider: "anthropic", model: "claude-sonnet-4-6") d
159
159
  end
160
160
  ```
161
161
 
162
- Run `bin/rails g llm_cost_tracker:add_streaming` once on existing installs to add the `stream` and `usage_source` columns. Run `bin/rails g llm_cost_tracker:add_provider_response_id` to persist provider-issued response IDs.
162
+ Run `bin/rails g llm_cost_tracker:add_streaming` once on existing installs to add the `stream` and `usage_source` columns. Run `bin/rails g llm_cost_tracker:add_provider_response_id` to persist provider-issued response IDs. Run `bin/rails g llm_cost_tracker:add_usage_breakdown` to add cache-read, cache-write, hidden-output, and pricing-mode columns.
163
163
 
164
164
  ### Manual tracking
165
165
 
@@ -176,6 +176,10 @@ LlmCostTracker.track(
176
176
  )
177
177
  ```
178
178
 
179
+ `input_tokens` is regular non-cache input. Put cache hits in
180
+ `cache_read_input_tokens` and cache writes in `cache_write_input_tokens`; total
181
+ tokens are calculated from the canonical billing breakdown.
182
+
179
183
  ## Configuration
180
184
 
181
185
  ```ruby
@@ -185,17 +189,19 @@ LlmCostTracker.configure do |config|
185
189
  config.default_tags = { app: "my_app", environment: Rails.env }
186
190
 
187
191
  config.monthly_budget = 500.00
192
+ config.daily_budget = 50.00
193
+ config.per_call_budget = 2.00
188
194
  config.budget_exceeded_behavior = :notify # :notify, :raise, :block_requests
189
195
  config.storage_error_behavior = :warn # :ignore, :warn, :raise
190
196
  config.unknown_pricing_behavior = :warn # :ignore, :warn, :raise
191
197
 
192
198
  config.on_budget_exceeded = ->(data) {
193
- SlackNotifier.notify("#alerts", "🚨 LLM budget $#{data[:monthly_total].round(2)} / $#{data[:budget]}")
199
+ SlackNotifier.notify("#alerts", "🚨 LLM #{data[:budget_type]} budget $#{data[:total].round(2)} / $#{data[:budget]}")
194
200
  }
195
201
 
196
202
  config.prices_file = Rails.root.join("config/llm_cost_tracker_prices.yml")
197
203
  config.pricing_overrides = {
198
- "ft:gpt-4o-mini:my-org" => { input: 0.30, cached_input: 0.15, output: 1.20 }
204
+ "ft:gpt-4o-mini:my-org" => { input: 0.30, cache_read_input: 0.15, output: 1.20 }
199
205
  }
200
206
 
201
207
  # Built-in: openrouter.ai, api.deepseek.com
@@ -203,7 +209,9 @@ LlmCostTracker.configure do |config|
203
209
  end
204
210
  ```
205
211
 
206
- Pricing is best effort. OpenRouter-style IDs like `openai/gpt-4o-mini` are normalized to built-in names when possible. Use `prices_file` / `pricing_overrides` for fine-tunes, gateway-specific IDs, enterprise discounts, batch pricing, or models the gem does not know.
212
+ Pricing is best effort. OpenRouter-style IDs like `openai/gpt-4o-mini` are normalized to built-in names when possible. Use `prices_file` / `pricing_overrides` for fine-tunes, gateway-specific IDs, enterprise discounts, alternate pricing modes, or models the gem does not know.
213
+ Provider-specific entries like `openai/gpt-4o-mini` win over model-only entries like `gpt-4o-mini`.
214
+ Pass `pricing_mode: :batch` to use optional mode-specific keys such as `batch_input` / `batch_output`; missing mode-specific keys fall back to standard `input` / `output` rates. The same pattern works for custom modes, for example `contract_input`.
207
215
 
208
216
  `storage_error_behavior = :warn` (default) lets LLM responses continue if storage fails; `:raise` exposes `StorageError#original_error`.
209
217
 
@@ -225,7 +233,7 @@ bin/rails generate llm_cost_tracker:prices
225
233
  {
226
234
  "metadata": { "updated_at": "2026-04-18", "currency": "USD", "unit": "1M tokens" },
227
235
  "models": {
228
- "my-gateway/gpt-4o-mini": { "input": 0.20, "cached_input": 0.10, "output": 0.80 }
236
+ "my-gateway/gpt-4o-mini": { "input": 0.20, "cache_read_input": 0.10, "output": 0.80, "batch_input": 0.10, "batch_output": 0.40 }
229
237
  }
230
238
  }
231
239
  ```
@@ -256,16 +264,22 @@ Large price changes are flagged during sync. If a specific entry is expected to
256
264
  ```ruby
257
265
  config.storage_backend = :active_record
258
266
  config.monthly_budget = 100.00
267
+ config.daily_budget = 10.00
268
+ config.per_call_budget = 1.00
259
269
  config.budget_exceeded_behavior = :block_requests
260
270
  ```
261
271
 
262
272
  - `:notify` — fire `on_budget_exceeded` after an event pushes the month over budget.
263
273
  - `:raise` — record the event, then raise `BudgetExceededError`.
264
- - `:block_requests` — block preflight when the stored monthly total is already over budget; still raises post-response on the event that crosses the line. Needs `:active_record` storage.
274
+ - `:block_requests` — block preflight when the stored monthly or daily total is already over budget; still raises post-response on the event that crosses the line. Needs `:active_record` storage for preflight.
275
+
276
+ `monthly_budget` and `daily_budget` are cumulative ledger limits. `per_call_budget` is a ceiling for a single priced event and runs after the response cost is known.
277
+
278
+ ActiveRecord installs keep `llm_cost_tracker_period_totals` in sync with atomic upserts. Budget preflight reads period rollups instead of scanning `llm_api_calls`.
265
279
 
266
280
  ```ruby
267
281
  rescue LlmCostTracker::BudgetExceededError => e
268
- # e.monthly_total, e.budget, e.last_event
282
+ # e.budget_type, e.total, e.budget, e.monthly_total, e.daily_total, e.call_cost, e.last_event
269
283
  ```
270
284
 
271
285
  `:block_requests` is a **guardrail, not a hard cap**. The preflight and the spend-recording write are separate statements, so under Puma / Sidekiq concurrency multiple workers can all pass the preflight and then collectively overshoot the budget. The setting reliably *stops new requests after the overshoot is visible* — it does not prevent the overshoot itself. For strict quotas use a provider- or gateway-level limit, or a database-backed counter outside this gem.
@@ -343,12 +357,15 @@ On other adapters tags fall back to JSON in a text column. `by_tag` uses JSONB c
343
357
  Upgrade an existing install:
344
358
 
345
359
  ```bash
360
+ bin/rails generate llm_cost_tracker:add_period_totals # shared budget rollups
346
361
  bin/rails generate llm_cost_tracker:upgrade_tags_to_jsonb # PG: text → jsonb + GIN
347
362
  bin/rails generate llm_cost_tracker:upgrade_cost_precision # widen cost columns
348
363
  bin/rails generate llm_cost_tracker:add_latency_ms
349
364
  bin/rails db:migrate
350
365
  ```
351
366
 
367
+ On PostgreSQL, the generated `upgrade_tags_to_jsonb` migration rewrites `llm_api_calls`. Run it during a maintenance window on large tables, or replace it with a two-phase backfill for zero-downtime deploys.
368
+
352
369
  ## Mounting the dashboard
353
370
 
354
371
  Optional Rails Engine. Plain ERB, no JavaScript framework, no asset pipeline required. Requires Rails 7.1+; the core middleware works without Rails.
@@ -400,12 +417,14 @@ ActiveSupport::Notifications.subscribe("llm_request.llm_cost_tracker") do |*, pa
400
417
  # payload =>
401
418
  # {
402
419
  # provider: "openai", model: "gpt-4o",
403
- # input_tokens: 150, output_tokens: 42, total_tokens: 192, latency_ms: 248,
420
+ # input_tokens: 150, cache_read_input_tokens: 0, cache_write_input_tokens: 0,
421
+ # hidden_output_tokens: 0, output_tokens: 42, total_tokens: 192, latency_ms: 248,
404
422
  # cost: {
405
- # input_cost: 0.000375, cached_input_cost: 0.0,
406
- # cache_read_input_cost: 0.0, cache_creation_input_cost: 0.0,
407
- # output_cost: 0.00042, total_cost: 0.000795, currency: "USD"
423
+ # input_cost: 0.000375, cache_read_input_cost: 0.0,
424
+ # cache_write_input_cost: 0.0, output_cost: 0.00042,
425
+ # total_cost: 0.000795, currency: "USD"
408
426
  # },
427
+ # pricing_mode: "batch",
409
428
  # tags: { feature: "chat", user_id: 42 },
410
429
  # tracked_at: 2026-04-16 14:30:00 UTC
411
430
  # }
@@ -508,11 +527,12 @@ The gem is designed for multi-threaded hosts — Puma with `max_threads > 1` and
508
527
  - `:block_requests` is a best-effort guardrail, not a hard cap. Concurrent workers can pass preflight simultaneously and collectively overshoot the budget. Use an external quota system if you need a transactional cap.
509
528
  - Streaming capture relies on the provider emitting a final-usage event (OpenAI needs `stream_options: { include_usage: true }`); missing events are recorded with `usage_source: "unknown"` so they surface on the Data Quality page.
510
529
  - `provider_response_id` is stored only when the provider exposes a stable response object ID. Missing IDs stay `nil` and surface on the Data Quality page.
511
- - Anthropic cache TTL variants (1h vs 5min writes) not modeled separately.
512
- - OpenAI reasoning tokens included in output totals; separate reasoning-token attribution not stored.
530
+ - Cache write TTL variants (1h vs 5min writes) not modeled separately.
513
531
 
514
532
  ## Development
515
533
 
534
+ Architecture rules for future changes live in [`docs/architecture.md`](docs/architecture.md).
535
+
516
536
  ```bash
517
537
  bundle install
518
538
  bundle exec rspec
@@ -13,6 +13,16 @@ module LlmCostTracker
13
13
  :stream_column_present,
14
14
  :missing_provider_response_id_count,
15
15
  :provider_response_id_column_present,
16
+ :usage_breakdown_column_present,
17
+ :input_tokens,
18
+ :cache_read_input_tokens,
19
+ :cache_write_input_tokens,
20
+ :output_tokens,
21
+ :hidden_output_tokens,
22
+ :input_cost,
23
+ :cache_read_input_cost,
24
+ :cache_write_input_cost,
25
+ :output_cost,
16
26
  :unknown_pricing_by_model
17
27
  )
18
28
 
@@ -20,32 +30,104 @@ module LlmCostTracker
20
30
  class << self
21
31
  def call(scope: LlmCostTracker::LlmApiCall.all)
22
32
  total = scope.count
23
- latency_present = LlmCostTracker::LlmApiCall.latency_column?
24
- stream_present = LlmCostTracker::LlmApiCall.stream_column?
25
- provider_response_id_present = LlmCostTracker::LlmApiCall.provider_response_id_column?
26
33
 
27
34
  DataQualityStats.new(
28
35
  total_calls: total,
29
36
  unknown_pricing_count: scope.unknown_pricing.count,
30
37
  untagged_calls_count: total - scope.with_json_tags.count,
38
+ **latency_stats(scope),
39
+ **stream_stats(scope),
40
+ **provider_response_id_stats(scope),
41
+ **usage_stats(scope),
42
+ unknown_pricing_by_model: unknown_pricing_by_model(scope)
43
+ )
44
+ end
45
+
46
+ private
47
+
48
+ def latency_stats(scope)
49
+ latency_present = LlmCostTracker::LlmApiCall.latency_column?
50
+
51
+ {
31
52
  missing_latency_count: latency_present ? scope.where(latency_ms: nil).count : nil,
32
- latency_column_present: latency_present,
53
+ latency_column_present: latency_present
54
+ }
55
+ end
56
+
57
+ def stream_stats(scope)
58
+ stream_present = LlmCostTracker::LlmApiCall.stream_column?
59
+
60
+ {
33
61
  streaming_count: stream_present ? scope.streaming.count : nil,
34
- streaming_missing_usage_count: if stream_present && LlmCostTracker::LlmApiCall.usage_source_column?
35
- scope.streaming_missing_usage.count
36
- end,
37
- stream_column_present: stream_present,
38
- missing_provider_response_id_count: (
39
- provider_response_id_present ? scope.missing_provider_response_id.count : nil
40
- ),
41
- provider_response_id_column_present: provider_response_id_present,
42
- unknown_pricing_by_model: scope.unknown_pricing
43
- .group(:model)
44
- .order(Arel.sql("COUNT(*) DESC"))
45
- .count
46
- .first(10)
47
- .to_h
48
- )
62
+ streaming_missing_usage_count: streaming_missing_usage_count(scope, stream_present),
63
+ stream_column_present: stream_present
64
+ }
65
+ end
66
+
67
+ def provider_response_id_stats(scope)
68
+ column_present = LlmCostTracker::LlmApiCall.provider_response_id_column?
69
+
70
+ {
71
+ missing_provider_response_id_count: column_present ? scope.missing_provider_response_id.count : nil,
72
+ provider_response_id_column_present: column_present
73
+ }
74
+ end
75
+
76
+ def usage_stats(scope)
77
+ usage_breakdown_present = LlmCostTracker::LlmApiCall.usage_breakdown_columns?
78
+ usage_breakdown_cost_present = LlmCostTracker::LlmApiCall.usage_breakdown_cost_columns?
79
+ sums = sum_columns(scope, usage_sum_columns(usage_breakdown_present, usage_breakdown_cost_present))
80
+
81
+ {
82
+ usage_breakdown_column_present: usage_breakdown_present,
83
+ input_tokens: sums[:input_tokens].to_i,
84
+ cache_read_input_tokens: usage_breakdown_present ? sums[:cache_read_input_tokens].to_i : nil,
85
+ cache_write_input_tokens: usage_breakdown_present ? sums[:cache_write_input_tokens].to_i : nil,
86
+ output_tokens: sums[:output_tokens].to_i,
87
+ hidden_output_tokens: usage_breakdown_present ? sums[:hidden_output_tokens].to_i : nil,
88
+ input_cost: decimal_sum(sums[:input_cost]),
89
+ cache_read_input_cost: usage_breakdown_cost_present ? decimal_sum(sums[:cache_read_input_cost]) : nil,
90
+ cache_write_input_cost: usage_breakdown_cost_present ? decimal_sum(sums[:cache_write_input_cost]) : nil,
91
+ output_cost: decimal_sum(sums[:output_cost])
92
+ }
93
+ end
94
+
95
+ def usage_sum_columns(usage_breakdown_present, usage_breakdown_cost_present)
96
+ columns = %i[input_tokens output_tokens input_cost output_cost]
97
+ if usage_breakdown_present
98
+ columns += %i[cache_read_input_tokens cache_write_input_tokens hidden_output_tokens]
99
+ end
100
+ columns += %i[cache_read_input_cost cache_write_input_cost] if usage_breakdown_cost_present
101
+ columns
102
+ end
103
+
104
+ def streaming_missing_usage_count(scope, stream_present)
105
+ return unless stream_present && LlmCostTracker::LlmApiCall.usage_source_column?
106
+
107
+ scope.streaming_missing_usage.count
108
+ end
109
+
110
+ def unknown_pricing_by_model(scope)
111
+ scope.unknown_pricing
112
+ .group(:model)
113
+ .order(Arel.sql("COUNT(*) DESC"))
114
+ .count
115
+ .first(10)
116
+ .to_h
117
+ end
118
+
119
+ def sum_columns(scope, columns)
120
+ values = scope.unscope(:order).pick(*columns.map { |column| sum_expression(scope, column) })
121
+
122
+ columns.zip(values).to_h
123
+ end
124
+
125
+ def sum_expression(scope, column)
126
+ Arel.sql("COALESCE(SUM(#{scope.connection.quote_column_name(column)}), 0)")
127
+ end
128
+
129
+ def decimal_sum(value)
130
+ value.to_f.round(8)
49
131
  end
50
132
  end
51
133
  end
@@ -2,6 +2,8 @@
2
2
  <% streaming_count = @stats.streaming_count %>
3
3
  <% streaming_missing_usage = @stats.streaming_missing_usage_count %>
4
4
  <% calls_with_provider_response_id = @stats.provider_response_id_column_present ? total - @stats.missing_provider_response_id_count : nil %>
5
+ <% billable_tokens = @stats.input_tokens + @stats.output_tokens + @stats.cache_read_input_tokens.to_i + @stats.cache_write_input_tokens.to_i %>
6
+ <% hidden_output_share = coverage_percent(@stats.hidden_output_tokens.to_i, @stats.output_tokens) %>
5
7
 
6
8
  <section class="lct-panel lct-toolbar">
7
9
  <div class="lct-toolbar-head">
@@ -118,6 +120,14 @@
118
120
  <p class="lct-stat-sub"><%= percent(coverage_percent(calls_with_provider_response_id, total)) %> of calls</p>
119
121
  </article>
120
122
  <% end %>
123
+
124
+ <% if @stats.usage_breakdown_column_present && @stats.output_tokens.positive? %>
125
+ <article class="lct-stat">
126
+ <p class="lct-stat-label">Hidden output share</p>
127
+ <p class="lct-stat-value"><%= percent(hidden_output_share) %></p>
128
+ <p class="lct-stat-sub"><%= number(@stats.hidden_output_tokens) %> of <%= number(@stats.output_tokens) %> output tokens</p>
129
+ </article>
130
+ <% end %>
121
131
  </div>
122
132
  </div>
123
133
  </section>
@@ -243,6 +253,61 @@
243
253
  </section>
244
254
  </section>
245
255
 
256
+ <% if @stats.usage_breakdown_column_present %>
257
+ <section class="lct-panel">
258
+ <div class="lct-section-head">
259
+ <div>
260
+ <h2 class="lct-section-title">Usage breakdown</h2>
261
+ </div>
262
+ </div>
263
+
264
+ <div class="lct-table-wrap">
265
+ <table class="lct-table lct-table-compact">
266
+ <thead>
267
+ <tr>
268
+ <th>Bucket</th>
269
+ <th class="lct-num">Tokens</th>
270
+ <th class="lct-num">Share</th>
271
+ <th class="lct-num">Cost</th>
272
+ </tr>
273
+ </thead>
274
+ <tbody>
275
+ <tr>
276
+ <td>Regular input</td>
277
+ <td class="lct-num"><%= number(@stats.input_tokens) %></td>
278
+ <td class="lct-num"><%= percent(coverage_percent(@stats.input_tokens, billable_tokens)) %></td>
279
+ <td class="lct-num"><%= money(@stats.input_cost) %></td>
280
+ </tr>
281
+ <tr>
282
+ <td>Cache read input</td>
283
+ <td class="lct-num"><%= number(@stats.cache_read_input_tokens) %></td>
284
+ <td class="lct-num"><%= percent(coverage_percent(@stats.cache_read_input_tokens, billable_tokens)) %></td>
285
+ <td class="lct-num<%= ' lct-num-muted' if @stats.cache_read_input_cost.nil? %>"><%= optional_money(@stats.cache_read_input_cost) %></td>
286
+ </tr>
287
+ <tr>
288
+ <td>Cache write input</td>
289
+ <td class="lct-num"><%= number(@stats.cache_write_input_tokens) %></td>
290
+ <td class="lct-num"><%= percent(coverage_percent(@stats.cache_write_input_tokens, billable_tokens)) %></td>
291
+ <td class="lct-num<%= ' lct-num-muted' if @stats.cache_write_input_cost.nil? %>"><%= optional_money(@stats.cache_write_input_cost) %></td>
292
+ </tr>
293
+ <tr>
294
+ <td>Output</td>
295
+ <td class="lct-num"><%= number(@stats.output_tokens) %></td>
296
+ <td class="lct-num"><%= percent(coverage_percent(@stats.output_tokens, billable_tokens)) %></td>
297
+ <td class="lct-num"><%= money(@stats.output_cost) %></td>
298
+ </tr>
299
+ <tr>
300
+ <td>Hidden output</td>
301
+ <td class="lct-num"><%= number(@stats.hidden_output_tokens) %></td>
302
+ <td class="lct-num"><%= percent(hidden_output_share) %> of output</td>
303
+ <td class="lct-num lct-num-muted">n/a</td>
304
+ </tr>
305
+ </tbody>
306
+ </table>
307
+ </div>
308
+ </section>
309
+ <% end %>
310
+
246
311
  <% unless @stats.unknown_pricing_by_model.empty? %>
247
312
  <section class="lct-panel">
248
313
  <div class="lct-section-head">
@@ -7,52 +7,116 @@ module LlmCostTracker
7
7
  class << self
8
8
  def enforce!
9
9
  config = LlmCostTracker.configuration
10
- return unless config.monthly_budget
11
10
  return unless config.budget_exceeded_behavior == :block_requests
12
11
  return unless config.active_record?
13
12
 
14
- monthly_total = active_record_monthly_total
15
- return unless monthly_total >= config.monthly_budget
16
-
17
- handle_exceeded(monthly_total: monthly_total)
13
+ enforce_period_budget(:monthly, config.monthly_budget)
14
+ enforce_period_budget(:daily, config.daily_budget)
18
15
  end
19
16
 
20
17
  def check!(event)
21
18
  config = LlmCostTracker.configuration
22
- return unless config.monthly_budget
23
19
  return unless event.cost
24
20
 
25
- monthly_total = if config.active_record?
26
- active_record_monthly_total
27
- else
28
- event.cost.total_cost
29
- end
30
- return unless monthly_total >= config.monthly_budget
31
-
32
- handle_exceeded(monthly_total: monthly_total, last_event: event)
21
+ check_per_call_budget(event, config)
22
+ check_period_budget(event, config, :daily, config.daily_budget)
23
+ check_period_budget(event, config, :monthly, config.monthly_budget)
33
24
  end
34
25
 
35
26
  private
36
27
 
37
- def active_record_monthly_total
28
+ def enforce_period_budget(period, budget)
29
+ return unless budget
30
+
31
+ total = active_record_total(period, time: Time.now.utc)
32
+ return unless total >= budget
33
+
34
+ handle_exceeded(budget_type: period, total: total, budget: budget)
35
+ end
36
+
37
+ def check_per_call_budget(event, config)
38
+ budget = config.per_call_budget
39
+ return unless budget
40
+
41
+ call_cost = event.cost.total_cost
42
+ return unless call_cost >= budget
43
+
44
+ handle_exceeded(budget_type: :per_call, total: call_cost, budget: budget, last_event: event)
45
+ end
46
+
47
+ def check_period_budget(event, config, period, budget)
48
+ return unless budget
49
+
50
+ total = if config.active_record?
51
+ active_record_total(period, time: event.tracked_at)
52
+ else
53
+ event.cost.total_cost
54
+ end
55
+ return unless total >= budget
56
+
57
+ handle_exceeded(budget_type: period, total: total, budget: budget, last_event: event)
58
+ end
59
+
60
+ def active_record_total(period, time:)
61
+ case period
62
+ when :monthly then active_record_monthly_total(time: time)
63
+ when :daily then active_record_daily_total(time: time)
64
+ end
65
+ end
66
+
67
+ def active_record_monthly_total(time: Time.now.utc)
38
68
  require_relative "llm_api_call" unless defined?(LlmCostTracker::LlmApiCall)
39
69
  require_relative "storage/active_record_store" unless defined?(LlmCostTracker::Storage::ActiveRecordStore)
40
70
 
41
- LlmCostTracker::Storage::ActiveRecordStore.monthly_total
71
+ LlmCostTracker::Storage::ActiveRecordStore.monthly_total(time: time)
42
72
  rescue LoadError => e
43
73
  raise Error, "ActiveRecord storage requires the active_record gem: #{e.message}"
44
74
  end
45
75
 
46
- def handle_exceeded(monthly_total:, last_event: nil)
76
+ def active_record_daily_total(time: Time.now.utc)
77
+ require_relative "llm_api_call" unless defined?(LlmCostTracker::LlmApiCall)
78
+ require_relative "storage/active_record_store" unless defined?(LlmCostTracker::Storage::ActiveRecordStore)
79
+
80
+ LlmCostTracker::Storage::ActiveRecordStore.daily_total(time: time)
81
+ rescue LoadError => e
82
+ raise Error, "ActiveRecord storage requires the active_record gem: #{e.message}"
83
+ end
84
+
85
+ def handle_exceeded(budget_type:, total:, budget:, last_event: nil)
47
86
  config = LlmCostTracker.configuration
87
+ payload = budget_payload(
88
+ budget_type: budget_type,
89
+ total: total,
90
+ budget: budget,
91
+ last_event: last_event
92
+ )
93
+
94
+ if notify_exceeded?(config, budget_type: budget_type, total: total, budget: budget, last_event: last_event)
95
+ config.on_budget_exceeded&.call(payload)
96
+ end
97
+ raise BudgetExceededError.new(**payload) if raise_on_exceeded?(config)
98
+ end
99
+
100
+ def budget_payload(budget_type:, total:, budget:, last_event:)
48
101
  payload = {
49
- monthly_total: monthly_total,
50
- budget: config.monthly_budget,
102
+ budget_type: budget_type,
103
+ total: total,
104
+ budget: budget,
51
105
  last_event: last_event
52
106
  }
107
+ payload[:monthly_total] = total if budget_type == :monthly
108
+ payload[:daily_total] = total if budget_type == :daily
109
+ payload[:call_cost] = total if budget_type == :per_call
110
+ payload
111
+ end
53
112
 
54
- config.on_budget_exceeded&.call(payload)
55
- raise BudgetExceededError.new(**payload) if raise_on_exceeded?(config)
113
+ def notify_exceeded?(config, budget_type:, total:, budget:, last_event:)
114
+ return false unless config.on_budget_exceeded
115
+ return true unless config.budget_exceeded_behavior == :notify
116
+ return true unless last_event&.cost
117
+ return true if budget_type == :per_call
118
+
119
+ total - last_event.cost.total_cost < budget
56
120
  end
57
121
 
58
122
  def raise_on_exceeded?(config)
@@ -19,6 +19,8 @@ module LlmCostTracker
19
19
  custom_storage
20
20
  on_budget_exceeded
21
21
  monthly_budget
22
+ daily_budget
23
+ per_call_budget
22
24
  log_level
23
25
  prices_file
24
26
  ].freeze
@@ -48,6 +50,8 @@ module LlmCostTracker
48
50
  @default_tags = {}
49
51
  @on_budget_exceeded = nil
50
52
  @monthly_budget = nil
53
+ @daily_budget = nil
54
+ @per_call_budget = nil
51
55
  self.budget_exceeded_behavior = :notify
52
56
  self.storage_error_behavior = :warn
53
57
  self.unknown_pricing_behavior = :warn
@@ -3,9 +3,8 @@
3
3
  module LlmCostTracker
4
4
  Cost = Data.define(
5
5
  :input_cost,
6
- :cached_input_cost,
7
6
  :cache_read_input_cost,
8
- :cache_creation_input_cost,
7
+ :cache_write_input_cost,
9
8
  :output_cost,
10
9
  :total_cost,
11
10
  :currency
@@ -6,14 +6,33 @@ module LlmCostTracker
6
6
  class InvalidFilterError < Error; end
7
7
 
8
8
  class BudgetExceededError < Error
9
- attr_reader :monthly_total, :budget, :last_event
9
+ attr_reader :monthly_total, :daily_total, :call_cost, :total, :budget, :budget_type, :last_event
10
10
 
11
- def initialize(monthly_total:, budget:, last_event: nil)
11
+ def initialize(budget:, last_event: nil, budget_type: nil, total: nil, monthly_total: nil, daily_total: nil,
12
+ call_cost: nil)
12
13
  @monthly_total = monthly_total
14
+ @daily_total = daily_total
15
+ @call_cost = call_cost
16
+ @total = total || monthly_total || daily_total || call_cost
13
17
  @budget = budget
18
+ @budget_type = budget_type || inferred_budget_type
14
19
  @last_event = last_event
15
20
 
16
- super("LLM monthly budget exceeded: $#{format('%.6f', monthly_total)} / $#{format('%.6f', budget)}")
21
+ super("LLM #{budget_label} budget exceeded: $#{format('%.6f', @total)} / $#{format('%.6f', budget)}")
22
+ end
23
+
24
+ private
25
+
26
+ def inferred_budget_type
27
+ return :monthly if monthly_total
28
+ return :daily if daily_total
29
+ return :per_call if call_cost
30
+
31
+ :unknown
32
+ end
33
+
34
+ def budget_label
35
+ budget_type.to_s.tr("_", "-")
17
36
  end
18
37
  end
19
38
 
@@ -7,6 +7,10 @@ module LlmCostTracker
7
7
  :input_tokens,
8
8
  :output_tokens,
9
9
  :total_tokens,
10
+ :cache_read_input_tokens,
11
+ :cache_write_input_tokens,
12
+ :hidden_output_tokens,
13
+ :pricing_mode,
10
14
  :cost,
11
15
  :tags,
12
16
  :latency_ms,