llm_cost_tracker 0.3.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +15 -0
  3. data/README.md +32 -15
  4. data/app/services/llm_cost_tracker/dashboard/data_quality.rb +101 -19
  5. data/app/views/llm_cost_tracker/data_quality/index.html.erb +65 -0
  6. data/lib/llm_cost_tracker/budget.rb +76 -22
  7. data/lib/llm_cost_tracker/configuration.rb +4 -0
  8. data/lib/llm_cost_tracker/cost.rb +1 -2
  9. data/lib/llm_cost_tracker/errors.rb +22 -3
  10. data/lib/llm_cost_tracker/event.rb +4 -0
  11. data/lib/llm_cost_tracker/event_metadata.rb +21 -15
  12. data/lib/llm_cost_tracker/generators/llm_cost_tracker/{add_monthly_totals_generator.rb → add_period_totals_generator.rb} +4 -4
  13. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_usage_breakdown_generator.rb +29 -0
  14. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_period_totals_to_llm_cost_tracker.rb.erb +66 -0
  15. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_usage_breakdown_to_llm_api_calls.rb.erb +29 -0
  16. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_api_calls.rb.erb +10 -3
  17. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +3 -1
  18. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/llm_cost_tracker_prices.yml.erb +11 -3
  19. data/lib/llm_cost_tracker/parsed_usage.rb +16 -7
  20. data/lib/llm_cost_tracker/parsers/anthropic.rb +7 -6
  21. data/lib/llm_cost_tracker/parsers/gemini.rb +5 -2
  22. data/lib/llm_cost_tracker/parsers/openai_usage.rb +18 -5
  23. data/lib/llm_cost_tracker/period_total.rb +9 -0
  24. data/lib/llm_cost_tracker/price_registry.rb +14 -4
  25. data/lib/llm_cost_tracker/price_sync/merger.rb +1 -1
  26. data/lib/llm_cost_tracker/price_sync/raw_price.rb +3 -5
  27. data/lib/llm_cost_tracker/price_sync/sources/litellm.rb +2 -3
  28. data/lib/llm_cost_tracker/price_sync/sources/open_router.rb +2 -3
  29. data/lib/llm_cost_tracker/prices.json +30 -30
  30. data/lib/llm_cost_tracker/pricing.rb +44 -32
  31. data/lib/llm_cost_tracker/railtie.rb +2 -1
  32. data/lib/llm_cost_tracker/storage/active_record_rollups.rb +122 -0
  33. data/lib/llm_cost_tracker/storage/active_record_store.rb +33 -80
  34. data/lib/llm_cost_tracker/stream_collector.rb +4 -2
  35. data/lib/llm_cost_tracker/tags_column.rb +19 -0
  36. data/lib/llm_cost_tracker/tracker.rb +54 -32
  37. data/lib/llm_cost_tracker/usage_breakdown.rb +30 -0
  38. data/lib/llm_cost_tracker/version.rb +1 -1
  39. data/lib/llm_cost_tracker.rb +10 -3
  40. metadata +8 -4
  41. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_monthly_totals_to_llm_cost_tracker.rb.erb +0 -48
  42. data/lib/llm_cost_tracker/monthly_total.rb +0 -9
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b966913d302d5c5c3466615d1fa3983855c241f6cd9e3e26558c0fcc5fc4e7d5
4
- data.tar.gz: 52804e702d5f01e5a4d247e8b50e601dede2b328bd7075c68ffd5f472b3b0d58
3
+ metadata.gz: ccb9a8365f4a06026a4352385efa1318ac59ce403cb848e0c9aff992fc80f64c
4
+ data.tar.gz: f21503cd322e923dc5bde0139cc61bc1547cef01eac59fe7a3861e1ab33e9860
5
5
  SHA512:
6
- metadata.gz: 609ba1a18be86dce0b567b2ea33b3f3123da88683f0c65d9aef780f2e4854d1dde6686adfa505fc154d13da6dd6cb2b31d9f38c303de5fb22f6fda65c7f44aa7
7
- data.tar.gz: de372e0940b4cfc400dacfc6dbf9e00f256c6944209da8cceaadd20a318b8c7aa8982d5190e21d38640ad20d04cc86400a4e872ec640189796e045acf1f7dfad
6
+ metadata.gz: 304ab6de6404f070b21b1dd72ce9eae2b44fb2fc7845eae8831a04971ed2b8ec2b6f740bc082fb36cfa42d90f0be59ab5800d43d72b68f04918a113b6d7d8cbd
7
+ data.tar.gz: afa2e92a99062bb1e0b4a00ab1d0762ca688f1890e0d76a29801881e2319e68db217c036d8f8c5d99558b580d5d4c039f8b3334283631763ee093fd12d369329
data/CHANGELOG.md CHANGED
@@ -4,6 +4,21 @@ Format: [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). Versioning: [S
4
4
 
5
5
  ## [Unreleased]
6
6
 
7
+ ## [0.4.0] - 2026-04-24
8
+
9
+ ### Changed
10
+
11
+ - BREAKING: Canonical usage and pricing now use `cache_read_input` / `cache_write_input` instead of `cached_input` / `cache_creation_input`.
12
+ - BREAKING: `Pricing.cost_for` now requires `provider:` and prefers provider-specific price entries before model-only entries.
13
+ - BREAKING: Fresh ActiveRecord installs include cache-read, cache-write, and hidden-output token/cost breakdown columns.
14
+ - BREAKING: ActiveRecord budget rollups now use `llm_cost_tracker_period_totals`.
15
+ - BREAKING: `llm_cost_tracker:add_monthly_totals` was replaced by `llm_cost_tracker:add_period_totals`.
16
+ - `llm_cost_tracker:add_usage_breakdown` generator for upgrading existing ActiveRecord installs.
17
+ - `llm_cost_tracker:add_period_totals` generator for upgrading existing ActiveRecord installs.
18
+ - Generic `pricing_mode` support with mode-prefixed local price keys.
19
+ - Data Quality now shows usage bucket totals and hidden-output share.
20
+ - Daily budget and per-call budget guardrails.
21
+
7
22
  ## [0.3.3] - 2026-04-24
8
23
 
9
24
  ### Added
data/README.md CHANGED
@@ -15,7 +15,7 @@ Every Rails app with LLM integrations eventually runs into the same question: wh
15
15
 
16
16
  ## What You Get
17
17
 
18
- - A local ActiveRecord ledger of provider, model, tokens, cost, latency, tags, streaming usage, and provider response IDs
18
+ - A local ActiveRecord ledger of provider, model, usage breakdown, cost, latency, tags, streaming usage, and provider response IDs
19
19
  - Faraday middleware plus explicit `track` / `track_stream` helpers for non-Faraday clients
20
20
  - Server-rendered Rails dashboard with overview, calls, tags, CSV export, and data-quality pages
21
21
  - Local pricing snapshots, price sync tasks, and budget guardrails
@@ -159,7 +159,7 @@ LlmCostTracker.track_stream(provider: "anthropic", model: "claude-sonnet-4-6") d
159
159
  end
160
160
  ```
161
161
 
162
- Run `bin/rails g llm_cost_tracker:add_streaming` once on existing installs to add the `stream` and `usage_source` columns. Run `bin/rails g llm_cost_tracker:add_provider_response_id` to persist provider-issued response IDs.
162
+ Run `bin/rails g llm_cost_tracker:add_streaming` once on existing installs to add the `stream` and `usage_source` columns. Run `bin/rails g llm_cost_tracker:add_provider_response_id` to persist provider-issued response IDs. Run `bin/rails g llm_cost_tracker:add_usage_breakdown` to add cache-read, cache-write, hidden-output, and pricing-mode columns.
163
163
 
164
164
  ### Manual tracking
165
165
 
@@ -176,6 +176,10 @@ LlmCostTracker.track(
176
176
  )
177
177
  ```
178
178
 
179
+ `input_tokens` is regular non-cache input. Put cache hits in
180
+ `cache_read_input_tokens` and cache writes in `cache_write_input_tokens`; total
181
+ tokens are calculated from the canonical billing breakdown.
182
+
179
183
  ## Configuration
180
184
 
181
185
  ```ruby
@@ -185,17 +189,19 @@ LlmCostTracker.configure do |config|
185
189
  config.default_tags = { app: "my_app", environment: Rails.env }
186
190
 
187
191
  config.monthly_budget = 500.00
192
+ config.daily_budget = 50.00
193
+ config.per_call_budget = 2.00
188
194
  config.budget_exceeded_behavior = :notify # :notify, :raise, :block_requests
189
195
  config.storage_error_behavior = :warn # :ignore, :warn, :raise
190
196
  config.unknown_pricing_behavior = :warn # :ignore, :warn, :raise
191
197
 
192
198
  config.on_budget_exceeded = ->(data) {
193
- SlackNotifier.notify("#alerts", "🚨 LLM budget $#{data[:monthly_total].round(2)} / $#{data[:budget]}")
199
+ SlackNotifier.notify("#alerts", "🚨 LLM #{data[:budget_type]} budget $#{data[:total].round(2)} / $#{data[:budget]}")
194
200
  }
195
201
 
196
202
  config.prices_file = Rails.root.join("config/llm_cost_tracker_prices.yml")
197
203
  config.pricing_overrides = {
198
- "ft:gpt-4o-mini:my-org" => { input: 0.30, cached_input: 0.15, output: 1.20 }
204
+ "ft:gpt-4o-mini:my-org" => { input: 0.30, cache_read_input: 0.15, output: 1.20 }
199
205
  }
200
206
 
201
207
  # Built-in: openrouter.ai, api.deepseek.com
@@ -203,7 +209,9 @@ LlmCostTracker.configure do |config|
203
209
  end
204
210
  ```
205
211
 
206
- Pricing is best effort. OpenRouter-style IDs like `openai/gpt-4o-mini` are normalized to built-in names when possible. Use `prices_file` / `pricing_overrides` for fine-tunes, gateway-specific IDs, enterprise discounts, batch pricing, or models the gem does not know.
212
+ Pricing is best effort. OpenRouter-style IDs like `openai/gpt-4o-mini` are normalized to built-in names when possible. Use `prices_file` / `pricing_overrides` for fine-tunes, gateway-specific IDs, enterprise discounts, alternate pricing modes, or models the gem does not know.
213
+ Provider-specific entries like `openai/gpt-4o-mini` win over model-only entries like `gpt-4o-mini`.
214
+ Pass `pricing_mode: :batch` to use optional mode-specific keys such as `batch_input` / `batch_output`; missing mode-specific keys fall back to standard `input` / `output` rates. The same pattern works for custom modes, for example `contract_input`.
207
215
 
208
216
  `storage_error_behavior = :warn` (default) lets LLM responses continue if storage fails; `:raise` exposes `StorageError#original_error`.
209
217
 
@@ -225,7 +233,7 @@ bin/rails generate llm_cost_tracker:prices
225
233
  {
226
234
  "metadata": { "updated_at": "2026-04-18", "currency": "USD", "unit": "1M tokens" },
227
235
  "models": {
228
- "my-gateway/gpt-4o-mini": { "input": 0.20, "cached_input": 0.10, "output": 0.80 }
236
+ "my-gateway/gpt-4o-mini": { "input": 0.20, "cache_read_input": 0.10, "output": 0.80, "batch_input": 0.10, "batch_output": 0.40 }
229
237
  }
230
238
  }
231
239
  ```
@@ -256,16 +264,22 @@ Large price changes are flagged during sync. If a specific entry is expected to
256
264
  ```ruby
257
265
  config.storage_backend = :active_record
258
266
  config.monthly_budget = 100.00
267
+ config.daily_budget = 10.00
268
+ config.per_call_budget = 1.00
259
269
  config.budget_exceeded_behavior = :block_requests
260
270
  ```
261
271
 
262
272
  - `:notify` — fire `on_budget_exceeded` after an event pushes the month over budget.
263
273
  - `:raise` — record the event, then raise `BudgetExceededError`.
264
- - `:block_requests` — block preflight when the stored monthly total is already over budget; still raises post-response on the event that crosses the line. Needs `:active_record` storage.
274
+ - `:block_requests` — block preflight when the stored monthly or daily total is already over budget; still raises post-response on the event that crosses the line. Needs `:active_record` storage for preflight.
275
+
276
+ `monthly_budget` and `daily_budget` are cumulative ledger limits. `per_call_budget` is a ceiling for a single priced event and runs after the response cost is known.
277
+
278
+ ActiveRecord installs keep `llm_cost_tracker_period_totals` in sync with atomic upserts. Budget preflight reads period rollups instead of scanning `llm_api_calls`.
265
279
 
266
280
  ```ruby
267
281
  rescue LlmCostTracker::BudgetExceededError => e
268
- # e.monthly_total, e.budget, e.last_event
282
+ # e.budget_type, e.total, e.budget, e.monthly_total, e.daily_total, e.call_cost, e.last_event
269
283
  ```
270
284
 
271
285
  `:block_requests` is a **guardrail, not a hard cap**. The preflight and the spend-recording write are separate statements, so under Puma / Sidekiq concurrency multiple workers can all pass the preflight and then collectively overshoot the budget. The setting reliably *stops new requests after the overshoot is visible* — it does not prevent the overshoot itself. For strict quotas use a provider- or gateway-level limit, or a database-backed counter outside this gem.
@@ -343,7 +357,7 @@ On other adapters tags fall back to JSON in a text column. `by_tag` uses JSONB c
343
357
  Upgrade an existing install:
344
358
 
345
359
  ```bash
346
- bin/rails generate llm_cost_tracker:add_monthly_totals # shared monthly budget rollups
360
+ bin/rails generate llm_cost_tracker:add_period_totals # shared budget rollups
347
361
  bin/rails generate llm_cost_tracker:upgrade_tags_to_jsonb # PG: text → jsonb + GIN
348
362
  bin/rails generate llm_cost_tracker:upgrade_cost_precision # widen cost columns
349
363
  bin/rails generate llm_cost_tracker:add_latency_ms
@@ -403,12 +417,14 @@ ActiveSupport::Notifications.subscribe("llm_request.llm_cost_tracker") do |*, pa
403
417
  # payload =>
404
418
  # {
405
419
  # provider: "openai", model: "gpt-4o",
406
- # input_tokens: 150, output_tokens: 42, total_tokens: 192, latency_ms: 248,
420
+ # input_tokens: 150, cache_read_input_tokens: 0, cache_write_input_tokens: 0,
421
+ # hidden_output_tokens: 0, output_tokens: 42, total_tokens: 192, latency_ms: 248,
407
422
  # cost: {
408
- # input_cost: 0.000375, cached_input_cost: 0.0,
409
- # cache_read_input_cost: 0.0, cache_creation_input_cost: 0.0,
410
- # output_cost: 0.00042, total_cost: 0.000795, currency: "USD"
423
+ # input_cost: 0.000375, cache_read_input_cost: 0.0,
424
+ # cache_write_input_cost: 0.0, output_cost: 0.00042,
425
+ # total_cost: 0.000795, currency: "USD"
411
426
  # },
427
+ # pricing_mode: "batch",
412
428
  # tags: { feature: "chat", user_id: 42 },
413
429
  # tracked_at: 2026-04-16 14:30:00 UTC
414
430
  # }
@@ -511,11 +527,12 @@ The gem is designed for multi-threaded hosts — Puma with `max_threads > 1` and
511
527
  - `:block_requests` is a best-effort guardrail, not a hard cap. Concurrent workers can pass preflight simultaneously and collectively overshoot the budget. Use an external quota system if you need a transactional cap.
512
528
  - Streaming capture relies on the provider emitting a final-usage event (OpenAI needs `stream_options: { include_usage: true }`); missing events are recorded with `usage_source: "unknown"` so they surface on the Data Quality page.
513
529
  - `provider_response_id` is stored only when the provider exposes a stable response object ID. Missing IDs stay `nil` and surface on the Data Quality page.
514
- - Anthropic cache TTL variants (1h vs 5min writes) not modeled separately.
515
- - OpenAI reasoning tokens included in output totals; separate reasoning-token attribution not stored.
530
+ - Cache write TTL variants (1h vs 5min writes) not modeled separately.
516
531
 
517
532
  ## Development
518
533
 
534
+ Architecture rules for future changes live in [`docs/architecture.md`](docs/architecture.md).
535
+
519
536
  ```bash
520
537
  bundle install
521
538
  bundle exec rspec
@@ -13,6 +13,16 @@ module LlmCostTracker
13
13
  :stream_column_present,
14
14
  :missing_provider_response_id_count,
15
15
  :provider_response_id_column_present,
16
+ :usage_breakdown_column_present,
17
+ :input_tokens,
18
+ :cache_read_input_tokens,
19
+ :cache_write_input_tokens,
20
+ :output_tokens,
21
+ :hidden_output_tokens,
22
+ :input_cost,
23
+ :cache_read_input_cost,
24
+ :cache_write_input_cost,
25
+ :output_cost,
16
26
  :unknown_pricing_by_model
17
27
  )
18
28
 
@@ -20,32 +30,104 @@ module LlmCostTracker
20
30
  class << self
21
31
  def call(scope: LlmCostTracker::LlmApiCall.all)
22
32
  total = scope.count
23
- latency_present = LlmCostTracker::LlmApiCall.latency_column?
24
- stream_present = LlmCostTracker::LlmApiCall.stream_column?
25
- provider_response_id_present = LlmCostTracker::LlmApiCall.provider_response_id_column?
26
33
 
27
34
  DataQualityStats.new(
28
35
  total_calls: total,
29
36
  unknown_pricing_count: scope.unknown_pricing.count,
30
37
  untagged_calls_count: total - scope.with_json_tags.count,
38
+ **latency_stats(scope),
39
+ **stream_stats(scope),
40
+ **provider_response_id_stats(scope),
41
+ **usage_stats(scope),
42
+ unknown_pricing_by_model: unknown_pricing_by_model(scope)
43
+ )
44
+ end
45
+
46
+ private
47
+
48
+ def latency_stats(scope)
49
+ latency_present = LlmCostTracker::LlmApiCall.latency_column?
50
+
51
+ {
31
52
  missing_latency_count: latency_present ? scope.where(latency_ms: nil).count : nil,
32
- latency_column_present: latency_present,
53
+ latency_column_present: latency_present
54
+ }
55
+ end
56
+
57
+ def stream_stats(scope)
58
+ stream_present = LlmCostTracker::LlmApiCall.stream_column?
59
+
60
+ {
33
61
  streaming_count: stream_present ? scope.streaming.count : nil,
34
- streaming_missing_usage_count: if stream_present && LlmCostTracker::LlmApiCall.usage_source_column?
35
- scope.streaming_missing_usage.count
36
- end,
37
- stream_column_present: stream_present,
38
- missing_provider_response_id_count: (
39
- provider_response_id_present ? scope.missing_provider_response_id.count : nil
40
- ),
41
- provider_response_id_column_present: provider_response_id_present,
42
- unknown_pricing_by_model: scope.unknown_pricing
43
- .group(:model)
44
- .order(Arel.sql("COUNT(*) DESC"))
45
- .count
46
- .first(10)
47
- .to_h
48
- )
62
+ streaming_missing_usage_count: streaming_missing_usage_count(scope, stream_present),
63
+ stream_column_present: stream_present
64
+ }
65
+ end
66
+
67
+ def provider_response_id_stats(scope)
68
+ column_present = LlmCostTracker::LlmApiCall.provider_response_id_column?
69
+
70
+ {
71
+ missing_provider_response_id_count: column_present ? scope.missing_provider_response_id.count : nil,
72
+ provider_response_id_column_present: column_present
73
+ }
74
+ end
75
+
76
+ def usage_stats(scope)
77
+ usage_breakdown_present = LlmCostTracker::LlmApiCall.usage_breakdown_columns?
78
+ usage_breakdown_cost_present = LlmCostTracker::LlmApiCall.usage_breakdown_cost_columns?
79
+ sums = sum_columns(scope, usage_sum_columns(usage_breakdown_present, usage_breakdown_cost_present))
80
+
81
+ {
82
+ usage_breakdown_column_present: usage_breakdown_present,
83
+ input_tokens: sums[:input_tokens].to_i,
84
+ cache_read_input_tokens: usage_breakdown_present ? sums[:cache_read_input_tokens].to_i : nil,
85
+ cache_write_input_tokens: usage_breakdown_present ? sums[:cache_write_input_tokens].to_i : nil,
86
+ output_tokens: sums[:output_tokens].to_i,
87
+ hidden_output_tokens: usage_breakdown_present ? sums[:hidden_output_tokens].to_i : nil,
88
+ input_cost: decimal_sum(sums[:input_cost]),
89
+ cache_read_input_cost: usage_breakdown_cost_present ? decimal_sum(sums[:cache_read_input_cost]) : nil,
90
+ cache_write_input_cost: usage_breakdown_cost_present ? decimal_sum(sums[:cache_write_input_cost]) : nil,
91
+ output_cost: decimal_sum(sums[:output_cost])
92
+ }
93
+ end
94
+
95
+ def usage_sum_columns(usage_breakdown_present, usage_breakdown_cost_present)
96
+ columns = %i[input_tokens output_tokens input_cost output_cost]
97
+ if usage_breakdown_present
98
+ columns += %i[cache_read_input_tokens cache_write_input_tokens hidden_output_tokens]
99
+ end
100
+ columns += %i[cache_read_input_cost cache_write_input_cost] if usage_breakdown_cost_present
101
+ columns
102
+ end
103
+
104
+ def streaming_missing_usage_count(scope, stream_present)
105
+ return unless stream_present && LlmCostTracker::LlmApiCall.usage_source_column?
106
+
107
+ scope.streaming_missing_usage.count
108
+ end
109
+
110
+ def unknown_pricing_by_model(scope)
111
+ scope.unknown_pricing
112
+ .group(:model)
113
+ .order(Arel.sql("COUNT(*) DESC"))
114
+ .count
115
+ .first(10)
116
+ .to_h
117
+ end
118
+
119
+ def sum_columns(scope, columns)
120
+ values = scope.unscope(:order).pick(*columns.map { |column| sum_expression(scope, column) })
121
+
122
+ columns.zip(values).to_h
123
+ end
124
+
125
+ def sum_expression(scope, column)
126
+ Arel.sql("COALESCE(SUM(#{scope.connection.quote_column_name(column)}), 0)")
127
+ end
128
+
129
+ def decimal_sum(value)
130
+ value.to_f.round(8)
49
131
  end
50
132
  end
51
133
  end
@@ -2,6 +2,8 @@
2
2
  <% streaming_count = @stats.streaming_count %>
3
3
  <% streaming_missing_usage = @stats.streaming_missing_usage_count %>
4
4
  <% calls_with_provider_response_id = @stats.provider_response_id_column_present ? total - @stats.missing_provider_response_id_count : nil %>
5
+ <% billable_tokens = @stats.input_tokens + @stats.output_tokens + @stats.cache_read_input_tokens.to_i + @stats.cache_write_input_tokens.to_i %>
6
+ <% hidden_output_share = coverage_percent(@stats.hidden_output_tokens.to_i, @stats.output_tokens) %>
5
7
 
6
8
  <section class="lct-panel lct-toolbar">
7
9
  <div class="lct-toolbar-head">
@@ -118,6 +120,14 @@
118
120
  <p class="lct-stat-sub"><%= percent(coverage_percent(calls_with_provider_response_id, total)) %> of calls</p>
119
121
  </article>
120
122
  <% end %>
123
+
124
+ <% if @stats.usage_breakdown_column_present && @stats.output_tokens.positive? %>
125
+ <article class="lct-stat">
126
+ <p class="lct-stat-label">Hidden output share</p>
127
+ <p class="lct-stat-value"><%= percent(hidden_output_share) %></p>
128
+ <p class="lct-stat-sub"><%= number(@stats.hidden_output_tokens) %> of <%= number(@stats.output_tokens) %> output tokens</p>
129
+ </article>
130
+ <% end %>
121
131
  </div>
122
132
  </div>
123
133
  </section>
@@ -243,6 +253,61 @@
243
253
  </section>
244
254
  </section>
245
255
 
256
+ <% if @stats.usage_breakdown_column_present %>
257
+ <section class="lct-panel">
258
+ <div class="lct-section-head">
259
+ <div>
260
+ <h2 class="lct-section-title">Usage breakdown</h2>
261
+ </div>
262
+ </div>
263
+
264
+ <div class="lct-table-wrap">
265
+ <table class="lct-table lct-table-compact">
266
+ <thead>
267
+ <tr>
268
+ <th>Bucket</th>
269
+ <th class="lct-num">Tokens</th>
270
+ <th class="lct-num">Share</th>
271
+ <th class="lct-num">Cost</th>
272
+ </tr>
273
+ </thead>
274
+ <tbody>
275
+ <tr>
276
+ <td>Regular input</td>
277
+ <td class="lct-num"><%= number(@stats.input_tokens) %></td>
278
+ <td class="lct-num"><%= percent(coverage_percent(@stats.input_tokens, billable_tokens)) %></td>
279
+ <td class="lct-num"><%= money(@stats.input_cost) %></td>
280
+ </tr>
281
+ <tr>
282
+ <td>Cache read input</td>
283
+ <td class="lct-num"><%= number(@stats.cache_read_input_tokens) %></td>
284
+ <td class="lct-num"><%= percent(coverage_percent(@stats.cache_read_input_tokens, billable_tokens)) %></td>
285
+ <td class="lct-num<%= ' lct-num-muted' if @stats.cache_read_input_cost.nil? %>"><%= optional_money(@stats.cache_read_input_cost) %></td>
286
+ </tr>
287
+ <tr>
288
+ <td>Cache write input</td>
289
+ <td class="lct-num"><%= number(@stats.cache_write_input_tokens) %></td>
290
+ <td class="lct-num"><%= percent(coverage_percent(@stats.cache_write_input_tokens, billable_tokens)) %></td>
291
+ <td class="lct-num<%= ' lct-num-muted' if @stats.cache_write_input_cost.nil? %>"><%= optional_money(@stats.cache_write_input_cost) %></td>
292
+ </tr>
293
+ <tr>
294
+ <td>Output</td>
295
+ <td class="lct-num"><%= number(@stats.output_tokens) %></td>
296
+ <td class="lct-num"><%= percent(coverage_percent(@stats.output_tokens, billable_tokens)) %></td>
297
+ <td class="lct-num"><%= money(@stats.output_cost) %></td>
298
+ </tr>
299
+ <tr>
300
+ <td>Hidden output</td>
301
+ <td class="lct-num"><%= number(@stats.hidden_output_tokens) %></td>
302
+ <td class="lct-num"><%= percent(hidden_output_share) %> of output</td>
303
+ <td class="lct-num lct-num-muted">n/a</td>
304
+ </tr>
305
+ </tbody>
306
+ </table>
307
+ </div>
308
+ </section>
309
+ <% end %>
310
+
246
311
  <% unless @stats.unknown_pricing_by_model.empty? %>
247
312
  <section class="lct-panel">
248
313
  <div class="lct-section-head">
@@ -7,33 +7,63 @@ module LlmCostTracker
7
7
  class << self
8
8
  def enforce!
9
9
  config = LlmCostTracker.configuration
10
- return unless config.monthly_budget
11
10
  return unless config.budget_exceeded_behavior == :block_requests
12
11
  return unless config.active_record?
13
12
 
14
- monthly_total = active_record_monthly_total
15
- return unless monthly_total >= config.monthly_budget
16
-
17
- handle_exceeded(monthly_total: monthly_total)
13
+ enforce_period_budget(:monthly, config.monthly_budget)
14
+ enforce_period_budget(:daily, config.daily_budget)
18
15
  end
19
16
 
20
17
  def check!(event)
21
18
  config = LlmCostTracker.configuration
22
- return unless config.monthly_budget
23
19
  return unless event.cost
24
20
 
25
- monthly_total = if config.active_record?
26
- active_record_monthly_total(time: event.tracked_at)
27
- else
28
- event.cost.total_cost
29
- end
30
- return unless monthly_total >= config.monthly_budget
31
-
32
- handle_exceeded(monthly_total: monthly_total, last_event: event)
21
+ check_per_call_budget(event, config)
22
+ check_period_budget(event, config, :daily, config.daily_budget)
23
+ check_period_budget(event, config, :monthly, config.monthly_budget)
33
24
  end
34
25
 
35
26
  private
36
27
 
28
+ def enforce_period_budget(period, budget)
29
+ return unless budget
30
+
31
+ total = active_record_total(period, time: Time.now.utc)
32
+ return unless total >= budget
33
+
34
+ handle_exceeded(budget_type: period, total: total, budget: budget)
35
+ end
36
+
37
+ def check_per_call_budget(event, config)
38
+ budget = config.per_call_budget
39
+ return unless budget
40
+
41
+ call_cost = event.cost.total_cost
42
+ return unless call_cost >= budget
43
+
44
+ handle_exceeded(budget_type: :per_call, total: call_cost, budget: budget, last_event: event)
45
+ end
46
+
47
+ def check_period_budget(event, config, period, budget)
48
+ return unless budget
49
+
50
+ total = if config.active_record?
51
+ active_record_total(period, time: event.tracked_at)
52
+ else
53
+ event.cost.total_cost
54
+ end
55
+ return unless total >= budget
56
+
57
+ handle_exceeded(budget_type: period, total: total, budget: budget, last_event: event)
58
+ end
59
+
60
+ def active_record_total(period, time:)
61
+ case period
62
+ when :monthly then active_record_monthly_total(time: time)
63
+ when :daily then active_record_daily_total(time: time)
64
+ end
65
+ end
66
+
37
67
  def active_record_monthly_total(time: Time.now.utc)
38
68
  require_relative "llm_api_call" unless defined?(LlmCostTracker::LlmApiCall)
39
69
  require_relative "storage/active_record_store" unless defined?(LlmCostTracker::Storage::ActiveRecordStore)
@@ -43,26 +73,50 @@ module LlmCostTracker
43
73
  raise Error, "ActiveRecord storage requires the active_record gem: #{e.message}"
44
74
  end
45
75
 
46
- def handle_exceeded(monthly_total:, last_event: nil)
76
+ def active_record_daily_total(time: Time.now.utc)
77
+ require_relative "llm_api_call" unless defined?(LlmCostTracker::LlmApiCall)
78
+ require_relative "storage/active_record_store" unless defined?(LlmCostTracker::Storage::ActiveRecordStore)
79
+
80
+ LlmCostTracker::Storage::ActiveRecordStore.daily_total(time: time)
81
+ rescue LoadError => e
82
+ raise Error, "ActiveRecord storage requires the active_record gem: #{e.message}"
83
+ end
84
+
85
+ def handle_exceeded(budget_type:, total:, budget:, last_event: nil)
47
86
  config = LlmCostTracker.configuration
48
- payload = {
49
- monthly_total: monthly_total,
50
- budget: config.monthly_budget,
87
+ payload = budget_payload(
88
+ budget_type: budget_type,
89
+ total: total,
90
+ budget: budget,
51
91
  last_event: last_event
52
- }
92
+ )
53
93
 
54
- if notify_exceeded?(config, monthly_total: monthly_total, last_event: last_event)
94
+ if notify_exceeded?(config, budget_type: budget_type, total: total, budget: budget, last_event: last_event)
55
95
  config.on_budget_exceeded&.call(payload)
56
96
  end
57
97
  raise BudgetExceededError.new(**payload) if raise_on_exceeded?(config)
58
98
  end
59
99
 
60
- def notify_exceeded?(config, monthly_total:, last_event:)
100
+ def budget_payload(budget_type:, total:, budget:, last_event:)
101
+ payload = {
102
+ budget_type: budget_type,
103
+ total: total,
104
+ budget: budget,
105
+ last_event: last_event
106
+ }
107
+ payload[:monthly_total] = total if budget_type == :monthly
108
+ payload[:daily_total] = total if budget_type == :daily
109
+ payload[:call_cost] = total if budget_type == :per_call
110
+ payload
111
+ end
112
+
113
+ def notify_exceeded?(config, budget_type:, total:, budget:, last_event:)
61
114
  return false unless config.on_budget_exceeded
62
115
  return true unless config.budget_exceeded_behavior == :notify
63
116
  return true unless last_event&.cost
117
+ return true if budget_type == :per_call
64
118
 
65
- monthly_total - last_event.cost.total_cost < config.monthly_budget
119
+ total - last_event.cost.total_cost < budget
66
120
  end
67
121
 
68
122
  def raise_on_exceeded?(config)
@@ -19,6 +19,8 @@ module LlmCostTracker
19
19
  custom_storage
20
20
  on_budget_exceeded
21
21
  monthly_budget
22
+ daily_budget
23
+ per_call_budget
22
24
  log_level
23
25
  prices_file
24
26
  ].freeze
@@ -48,6 +50,8 @@ module LlmCostTracker
48
50
  @default_tags = {}
49
51
  @on_budget_exceeded = nil
50
52
  @monthly_budget = nil
53
+ @daily_budget = nil
54
+ @per_call_budget = nil
51
55
  self.budget_exceeded_behavior = :notify
52
56
  self.storage_error_behavior = :warn
53
57
  self.unknown_pricing_behavior = :warn
@@ -3,9 +3,8 @@
3
3
  module LlmCostTracker
4
4
  Cost = Data.define(
5
5
  :input_cost,
6
- :cached_input_cost,
7
6
  :cache_read_input_cost,
8
- :cache_creation_input_cost,
7
+ :cache_write_input_cost,
9
8
  :output_cost,
10
9
  :total_cost,
11
10
  :currency
@@ -6,14 +6,33 @@ module LlmCostTracker
6
6
  class InvalidFilterError < Error; end
7
7
 
8
8
  class BudgetExceededError < Error
9
- attr_reader :monthly_total, :budget, :last_event
9
+ attr_reader :monthly_total, :daily_total, :call_cost, :total, :budget, :budget_type, :last_event
10
10
 
11
- def initialize(monthly_total:, budget:, last_event: nil)
11
+ def initialize(budget:, last_event: nil, budget_type: nil, total: nil, monthly_total: nil, daily_total: nil,
12
+ call_cost: nil)
12
13
  @monthly_total = monthly_total
14
+ @daily_total = daily_total
15
+ @call_cost = call_cost
16
+ @total = total || monthly_total || daily_total || call_cost
13
17
  @budget = budget
18
+ @budget_type = budget_type || inferred_budget_type
14
19
  @last_event = last_event
15
20
 
16
- super("LLM monthly budget exceeded: $#{format('%.6f', monthly_total)} / $#{format('%.6f', budget)}")
21
+ super("LLM #{budget_label} budget exceeded: $#{format('%.6f', @total)} / $#{format('%.6f', budget)}")
22
+ end
23
+
24
+ private
25
+
26
+ def inferred_budget_type
27
+ return :monthly if monthly_total
28
+ return :daily if daily_total
29
+ return :per_call if call_cost
30
+
31
+ :unknown
32
+ end
33
+
34
+ def budget_label
35
+ budget_type.to_s.tr("_", "-")
17
36
  end
18
37
  end
19
38
 
@@ -7,6 +7,10 @@ module LlmCostTracker
7
7
  :input_tokens,
8
8
  :output_tokens,
9
9
  :total_tokens,
10
+ :cache_read_input_tokens,
11
+ :cache_write_input_tokens,
12
+ :hidden_output_tokens,
13
+ :pricing_mode,
10
14
  :cost,
11
15
  :tags,
12
16
  :latency_ms,