RubyGems - llm_cost_tracker - Versions diffs - 0.3.2 → 0.4.0 - Mend

llm_cost_tracker 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +35 -0
data/README.md +34 -14
data/app/services/llm_cost_tracker/dashboard/data_quality.rb +101 -19
data/app/views/llm_cost_tracker/data_quality/index.html.erb +65 -0
data/lib/llm_cost_tracker/budget.rb +85 -21
data/lib/llm_cost_tracker/configuration.rb +4 -0
data/lib/llm_cost_tracker/cost.rb +1 -2
data/lib/llm_cost_tracker/errors.rb +22 -3
data/lib/llm_cost_tracker/event.rb +4 -0
data/lib/llm_cost_tracker/event_metadata.rb +21 -15
data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_period_totals_generator.rb +29 -0
data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_usage_breakdown_generator.rb +29 -0
data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_period_totals_to_llm_cost_tracker.rb.erb +66 -0
data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_usage_breakdown_to_llm_api_calls.rb.erb +29 -0
data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_api_calls.rb.erb +15 -0
data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +3 -1
data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/llm_cost_tracker_prices.yml.erb +11 -3
data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_tags_to_jsonb.rb.erb +1 -0
data/lib/llm_cost_tracker/middleware/faraday.rb +27 -9
data/lib/llm_cost_tracker/parsed_usage.rb +16 -7
data/lib/llm_cost_tracker/parsers/anthropic.rb +7 -6
data/lib/llm_cost_tracker/parsers/base.rb +2 -1
data/lib/llm_cost_tracker/parsers/gemini.rb +5 -2
data/lib/llm_cost_tracker/parsers/openai_usage.rb +18 -5
data/lib/llm_cost_tracker/period_total.rb +9 -0
data/lib/llm_cost_tracker/price_registry.rb +14 -4
data/lib/llm_cost_tracker/price_sync/merger.rb +1 -1
data/lib/llm_cost_tracker/price_sync/raw_price.rb +3 -5
data/lib/llm_cost_tracker/price_sync/sources/litellm.rb +2 -3
data/lib/llm_cost_tracker/price_sync/sources/open_router.rb +2 -3
data/lib/llm_cost_tracker/prices.json +30 -30
data/lib/llm_cost_tracker/pricing.rb +44 -32
data/lib/llm_cost_tracker/railtie.rb +2 -0
data/lib/llm_cost_tracker/storage/active_record_rollups.rb +122 -0
data/lib/llm_cost_tracker/storage/active_record_store.rb +38 -13
data/lib/llm_cost_tracker/stream_collector.rb +5 -3
data/lib/llm_cost_tracker/tags_column.rb +19 -0
data/lib/llm_cost_tracker/tracker.rb +58 -32
data/lib/llm_cost_tracker/unknown_pricing.rb +14 -0
data/lib/llm_cost_tracker/usage_breakdown.rb +30 -0
data/lib/llm_cost_tracker/version.rb +1 -1
data/lib/llm_cost_tracker.rb +12 -3
metadata +10 -4
data/llm_cost_tracker.gemspec +0 -50

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 6952282e6f93b4e5658ef9d2b9527d2a332cb2d6f483da25540c3a0d6672ed9b
-  data.tar.gz: e66eaaeb99698abf9c0ff9e3f1305e6bb27a8b6c25355e94bce4baec5f5d3a50
+  metadata.gz: ccb9a8365f4a06026a4352385efa1318ac59ce403cb848e0c9aff992fc80f64c
+  data.tar.gz: f21503cd322e923dc5bde0139cc61bc1547cef01eac59fe7a3861e1ab33e9860
 SHA512:
-  metadata.gz: '078d695498ed6f254a700ccb3381ace3feaa1f4880691a1a69be4bb435097202ecf28f2cbf065202a543186bdd3894aaedcc44bfacc2d2ffa25a54ddf6d1cc76'
-  data.tar.gz: 2638ae3c579bd2c0f71a73b06719eb0a35d7b82e8614a74c5100f41b69693babfd3b613ecf18e7f6e7ea33210222432efa5a7ca718325c4c8fd12be9b8ab806e
+  metadata.gz: 304ab6de6404f070b21b1dd72ce9eae2b44fb2fc7845eae8831a04971ed2b8ec2b6f740bc082fb36cfa42d90f0be59ab5800d43d72b68f04918a113b6d7d8cbd
+  data.tar.gz: afa2e92a99062bb1e0b4a00ab1d0762ca688f1890e0d76a29801881e2319e68db217c036d8f8c5d99558b580d5d4c039f8b3334283631763ee093fd12d369329

data/CHANGELOG.md CHANGED Viewed

@@ -4,6 +4,41 @@ Format: [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). Versioning: [S
 ## [Unreleased]
+## [0.4.0] - 2026-04-24
+### Changed
+- BREAKING: Canonical usage and pricing now use `cache_read_input` / `cache_write_input` instead of `cached_input` / `cache_creation_input`.
+- BREAKING: `Pricing.cost_for` now requires `provider:` and prefers provider-specific price entries before model-only entries.
+- BREAKING: Fresh ActiveRecord installs include cache-read, cache-write, and hidden-output token/cost breakdown columns.
+- BREAKING: ActiveRecord budget rollups now use `llm_cost_tracker_period_totals`.
+- BREAKING: `llm_cost_tracker:add_monthly_totals` was replaced by `llm_cost_tracker:add_period_totals`.
+- `llm_cost_tracker:add_usage_breakdown` generator for upgrading existing ActiveRecord installs.
+- `llm_cost_tracker:add_period_totals` generator for upgrading existing ActiveRecord installs.
+- Generic `pricing_mode` support with mode-prefixed local price keys.
+- Data Quality now shows usage bucket totals and hidden-output share.
+- Daily budget and per-call budget guardrails.
+## [0.3.3] - 2026-04-24
+### Added
+- Monthly rollup totals for ActiveRecord budget checks, plus `llm_cost_tracker:add_monthly_totals` for upgrading existing installs.
+### Changed
+- ActiveRecord monthly totals now update through a single atomic upsert.
+- Faraday stream capture overflow now records `usage_source: "unknown"` instead of dropping the tracked event.
+- Budget `:notify` callbacks now fire only on the first event that crosses the monthly limit.
+### Fixed
+- Treat `config.enabled = false` as a global kill switch for direct `track` and `track_stream` calls too.
+- Deduplicate unknown-pricing warnings per model.
+- Detect streaming requests from parsed JSON instead of raw body substring matching.
+- Cap automatic SSE capture to avoid unbounded memory growth on large streaming responses.
+- Warn that the generated PostgreSQL `tags -> jsonb` upgrade migration rewrites large tables and should run in a maintenance window.
 ## [0.3.2] - 2026-04-22
 ### Added

data/README.md CHANGED Viewed

@@ -15,7 +15,7 @@ Every Rails app with LLM integrations eventually runs into the same question: wh
 ## What You Get
-- A local ActiveRecord ledger of provider, model, tokens, cost, latency, tags, streaming usage, and provider response IDs
+- A local ActiveRecord ledger of provider, model, usage breakdown, cost, latency, tags, streaming usage, and provider response IDs
 - Faraday middleware plus explicit `track` / `track_stream` helpers for non-Faraday clients
 - Server-rendered Rails dashboard with overview, calls, tags, CSV export, and data-quality pages
 - Local pricing snapshots, price sync tasks, and budget guardrails
@@ -159,7 +159,7 @@ LlmCostTracker.track_stream(provider: "anthropic", model: "claude-sonnet-4-6") d
 end
 ```
-Run `bin/rails g llm_cost_tracker:add_streaming` once on existing installs to add the `stream` and `usage_source` columns. Run `bin/rails g llm_cost_tracker:add_provider_response_id` to persist provider-issued response IDs.
+Run `bin/rails g llm_cost_tracker:add_streaming` once on existing installs to add the `stream` and `usage_source` columns. Run `bin/rails g llm_cost_tracker:add_provider_response_id` to persist provider-issued response IDs. Run `bin/rails g llm_cost_tracker:add_usage_breakdown` to add cache-read, cache-write, hidden-output, and pricing-mode columns.
 ### Manual tracking
@@ -176,6 +176,10 @@ LlmCostTracker.track(
 )
 ```
+`input_tokens` is regular non-cache input. Put cache hits in
+`cache_read_input_tokens` and cache writes in `cache_write_input_tokens`; total
+tokens are calculated from the canonical billing breakdown.
 ## Configuration
 ```ruby
@@ -185,17 +189,19 @@ LlmCostTracker.configure do |config|
   config.default_tags = { app: "my_app", environment: Rails.env }
   config.monthly_budget = 500.00
+  config.daily_budget = 50.00
+  config.per_call_budget = 2.00
   config.budget_exceeded_behavior = :notify  # :notify, :raise, :block_requests
   config.storage_error_behavior   = :warn    # :ignore, :warn, :raise
   config.unknown_pricing_behavior = :warn    # :ignore, :warn, :raise
   config.on_budget_exceeded = ->(data) {
-    SlackNotifier.notify("#alerts", "🚨 LLM budget $#{data[:monthly_total].round(2)} / $#{data[:budget]}")
+    SlackNotifier.notify("#alerts", "🚨 LLM #{data[:budget_type]} budget $#{data[:total].round(2)} / $#{data[:budget]}")
   }
   config.prices_file = Rails.root.join("config/llm_cost_tracker_prices.yml")
   config.pricing_overrides = {
-    "ft:gpt-4o-mini:my-org" => { input: 0.30, cached_input: 0.15, output: 1.20 }
+    "ft:gpt-4o-mini:my-org" => { input: 0.30, cache_read_input: 0.15, output: 1.20 }
   }
   # Built-in: openrouter.ai, api.deepseek.com
@@ -203,7 +209,9 @@ LlmCostTracker.configure do |config|
 end
 ```
-Pricing is best effort. OpenRouter-style IDs like `openai/gpt-4o-mini` are normalized to built-in names when possible. Use `prices_file` / `pricing_overrides` for fine-tunes, gateway-specific IDs, enterprise discounts, batch pricing, or models the gem does not know.
+Pricing is best effort. OpenRouter-style IDs like `openai/gpt-4o-mini` are normalized to built-in names when possible. Use `prices_file` / `pricing_overrides` for fine-tunes, gateway-specific IDs, enterprise discounts, alternate pricing modes, or models the gem does not know.
+Provider-specific entries like `openai/gpt-4o-mini` win over model-only entries like `gpt-4o-mini`.
+Pass `pricing_mode: :batch` to use optional mode-specific keys such as `batch_input` / `batch_output`; missing mode-specific keys fall back to standard `input` / `output` rates. The same pattern works for custom modes, for example `contract_input`.
 `storage_error_behavior = :warn` (default) lets LLM responses continue if storage fails; `:raise` exposes `StorageError#original_error`.
@@ -225,7 +233,7 @@ bin/rails generate llm_cost_tracker:prices
 {
   "metadata": { "updated_at": "2026-04-18", "currency": "USD", "unit": "1M tokens" },
   "models": {
-    "my-gateway/gpt-4o-mini": { "input": 0.20, "cached_input": 0.10, "output": 0.80 }
+    "my-gateway/gpt-4o-mini": { "input": 0.20, "cache_read_input": 0.10, "output": 0.80, "batch_input": 0.10, "batch_output": 0.40 }
   }
 }
 ```
@@ -256,16 +264,22 @@ Large price changes are flagged during sync. If a specific entry is expected to
 ```ruby
 config.storage_backend = :active_record
 config.monthly_budget = 100.00
+config.daily_budget = 10.00
+config.per_call_budget = 1.00
 config.budget_exceeded_behavior = :block_requests
 ```
 - `:notify` — fire `on_budget_exceeded` after an event pushes the month over budget.
 - `:raise` — record the event, then raise `BudgetExceededError`.
-- `:block_requests` — block preflight when the stored monthly total is already over budget; still raises post-response on the event that crosses the line. Needs `:active_record` storage.
+- `:block_requests` — block preflight when the stored monthly or daily total is already over budget; still raises post-response on the event that crosses the line. Needs `:active_record` storage for preflight.
+`monthly_budget` and `daily_budget` are cumulative ledger limits. `per_call_budget` is a ceiling for a single priced event and runs after the response cost is known.
+ActiveRecord installs keep `llm_cost_tracker_period_totals` in sync with atomic upserts. Budget preflight reads period rollups instead of scanning `llm_api_calls`.
 ```ruby
 rescue LlmCostTracker::BudgetExceededError => e
-  # e.monthly_total, e.budget, e.last_event
+  # e.budget_type, e.total, e.budget, e.monthly_total, e.daily_total, e.call_cost, e.last_event
 ```
 `:block_requests` is a **guardrail, not a hard cap**. The preflight and the spend-recording write are separate statements, so under Puma / Sidekiq concurrency multiple workers can all pass the preflight and then collectively overshoot the budget. The setting reliably *stops new requests after the overshoot is visible* — it does not prevent the overshoot itself. For strict quotas use a provider- or gateway-level limit, or a database-backed counter outside this gem.
@@ -343,12 +357,15 @@ On other adapters tags fall back to JSON in a text column. `by_tag` uses JSONB c
 Upgrade an existing install:
 ```bash
+bin/rails generate llm_cost_tracker:add_period_totals    # shared budget rollups
 bin/rails generate llm_cost_tracker:upgrade_tags_to_jsonb   # PG: text → jsonb + GIN
 bin/rails generate llm_cost_tracker:upgrade_cost_precision  # widen cost columns
 bin/rails generate llm_cost_tracker:add_latency_ms
 bin/rails db:migrate
 ```
+On PostgreSQL, the generated `upgrade_tags_to_jsonb` migration rewrites `llm_api_calls`. Run it during a maintenance window on large tables, or replace it with a two-phase backfill for zero-downtime deploys.
 ## Mounting the dashboard
 Optional Rails Engine. Plain ERB, no JavaScript framework, no asset pipeline required. Requires Rails 7.1+; the core middleware works without Rails.
@@ -400,12 +417,14 @@ ActiveSupport::Notifications.subscribe("llm_request.llm_cost_tracker") do |*, pa
   # payload =>
   # {
   #   provider: "openai", model: "gpt-4o",
-  #   input_tokens: 150, output_tokens: 42, total_tokens: 192, latency_ms: 248,
+  #   input_tokens: 150, cache_read_input_tokens: 0, cache_write_input_tokens: 0,
+  #   hidden_output_tokens: 0, output_tokens: 42, total_tokens: 192, latency_ms: 248,
   #   cost: {
-  #     input_cost: 0.000375, cached_input_cost: 0.0,
-  #     cache_read_input_cost: 0.0, cache_creation_input_cost: 0.0,
-  #     output_cost: 0.00042, total_cost: 0.000795, currency: "USD"
+  #     input_cost: 0.000375, cache_read_input_cost: 0.0,
+  #     cache_write_input_cost: 0.0, output_cost: 0.00042,
+  #     total_cost: 0.000795, currency: "USD"
   #   },
+  #   pricing_mode: "batch",
   #   tags: { feature: "chat", user_id: 42 },
   #   tracked_at: 2026-04-16 14:30:00 UTC
   # }
@@ -508,11 +527,12 @@ The gem is designed for multi-threaded hosts — Puma with `max_threads > 1` and
 - `:block_requests` is a best-effort guardrail, not a hard cap. Concurrent workers can pass preflight simultaneously and collectively overshoot the budget. Use an external quota system if you need a transactional cap.
 - Streaming capture relies on the provider emitting a final-usage event (OpenAI needs `stream_options: { include_usage: true }`); missing events are recorded with `usage_source: "unknown"` so they surface on the Data Quality page.
 - `provider_response_id` is stored only when the provider exposes a stable response object ID. Missing IDs stay `nil` and surface on the Data Quality page.
-- Anthropic cache TTL variants (1h vs 5min writes) not modeled separately.
-- OpenAI reasoning tokens included in output totals; separate reasoning-token attribution not stored.
+- Cache write TTL variants (1h vs 5min writes) not modeled separately.
 ## Development
+Architecture rules for future changes live in [`docs/architecture.md`](docs/architecture.md).
 ```bash
 bundle install
 bundle exec rspec

data/app/services/llm_cost_tracker/dashboard/data_quality.rb CHANGED Viewed

@@ -13,6 +13,16 @@ module LlmCostTracker
       :stream_column_present,
       :missing_provider_response_id_count,
       :provider_response_id_column_present,
+      :usage_breakdown_column_present,
+      :input_tokens,
+      :cache_read_input_tokens,
+      :cache_write_input_tokens,
+      :output_tokens,
+      :hidden_output_tokens,
+      :input_cost,
+      :cache_read_input_cost,
+      :cache_write_input_cost,
+      :output_cost,
       :unknown_pricing_by_model
     )
@@ -20,32 +30,104 @@ module LlmCostTracker
       class << self
         def call(scope: LlmCostTracker::LlmApiCall.all)
           total = scope.count
-          latency_present = LlmCostTracker::LlmApiCall.latency_column?
-          stream_present = LlmCostTracker::LlmApiCall.stream_column?
-          provider_response_id_present = LlmCostTracker::LlmApiCall.provider_response_id_column?
           DataQualityStats.new(
             total_calls: total,
             unknown_pricing_count: scope.unknown_pricing.count,
             untagged_calls_count: total - scope.with_json_tags.count,
+            **latency_stats(scope),
+            **stream_stats(scope),
+            **provider_response_id_stats(scope),
+            **usage_stats(scope),
+            unknown_pricing_by_model: unknown_pricing_by_model(scope)
+          )
+        end
+        private
+        def latency_stats(scope)
+          latency_present = LlmCostTracker::LlmApiCall.latency_column?
+          {
             missing_latency_count: latency_present ? scope.where(latency_ms: nil).count : nil,
-            latency_column_present: latency_present,
+            latency_column_present: latency_present
+          }
+        end
+        def stream_stats(scope)
+          stream_present = LlmCostTracker::LlmApiCall.stream_column?
+          {
             streaming_count: stream_present ? scope.streaming.count : nil,
-            streaming_missing_usage_count: if stream_present && LlmCostTracker::LlmApiCall.usage_source_column?
-                                             scope.streaming_missing_usage.count
-                                           end,
-            stream_column_present: stream_present,
-            missing_provider_response_id_count: (
-              provider_response_id_present ? scope.missing_provider_response_id.count : nil
-            ),
-            provider_response_id_column_present: provider_response_id_present,
-            unknown_pricing_by_model: scope.unknown_pricing
-                                      .group(:model)
-                                      .order(Arel.sql("COUNT(*) DESC"))
-                                      .count
-                                      .first(10)
-                                      .to_h
-          )
+            streaming_missing_usage_count: streaming_missing_usage_count(scope, stream_present),
+            stream_column_present: stream_present
+          }
+        end
+        def provider_response_id_stats(scope)
+          column_present = LlmCostTracker::LlmApiCall.provider_response_id_column?
+          {
+            missing_provider_response_id_count: column_present ? scope.missing_provider_response_id.count : nil,
+            provider_response_id_column_present: column_present
+          }
+        end
+        def usage_stats(scope)
+          usage_breakdown_present = LlmCostTracker::LlmApiCall.usage_breakdown_columns?
+          usage_breakdown_cost_present = LlmCostTracker::LlmApiCall.usage_breakdown_cost_columns?
+          sums = sum_columns(scope, usage_sum_columns(usage_breakdown_present, usage_breakdown_cost_present))
+          {
+            usage_breakdown_column_present: usage_breakdown_present,
+            input_tokens: sums[:input_tokens].to_i,
+            cache_read_input_tokens: usage_breakdown_present ? sums[:cache_read_input_tokens].to_i : nil,
+            cache_write_input_tokens: usage_breakdown_present ? sums[:cache_write_input_tokens].to_i : nil,
+            output_tokens: sums[:output_tokens].to_i,
+            hidden_output_tokens: usage_breakdown_present ? sums[:hidden_output_tokens].to_i : nil,
+            input_cost: decimal_sum(sums[:input_cost]),
+            cache_read_input_cost: usage_breakdown_cost_present ? decimal_sum(sums[:cache_read_input_cost]) : nil,
+            cache_write_input_cost: usage_breakdown_cost_present ? decimal_sum(sums[:cache_write_input_cost]) : nil,
+            output_cost: decimal_sum(sums[:output_cost])
+          }
+        end
+        def usage_sum_columns(usage_breakdown_present, usage_breakdown_cost_present)
+          columns = %i[input_tokens output_tokens input_cost output_cost]
+          if usage_breakdown_present
+            columns += %i[cache_read_input_tokens cache_write_input_tokens hidden_output_tokens]
+          end
+          columns += %i[cache_read_input_cost cache_write_input_cost] if usage_breakdown_cost_present
+          columns
+        end
+        def streaming_missing_usage_count(scope, stream_present)
+          return unless stream_present && LlmCostTracker::LlmApiCall.usage_source_column?
+          scope.streaming_missing_usage.count
+        end
+        def unknown_pricing_by_model(scope)
+          scope.unknown_pricing
+               .group(:model)
+               .order(Arel.sql("COUNT(*) DESC"))
+               .count
+               .first(10)
+               .to_h
+        end
+        def sum_columns(scope, columns)
+          values = scope.unscope(:order).pick(*columns.map { |column| sum_expression(scope, column) })
+          columns.zip(values).to_h
+        end
+        def sum_expression(scope, column)
+          Arel.sql("COALESCE(SUM(#{scope.connection.quote_column_name(column)}), 0)")
+        end
+        def decimal_sum(value)
+          value.to_f.round(8)
         end
       end
     end

data/app/views/llm_cost_tracker/data_quality/index.html.erb CHANGED Viewed

@@ -2,6 +2,8 @@
 <% streaming_count = @stats.streaming_count %>
 <% streaming_missing_usage = @stats.streaming_missing_usage_count %>
 <% calls_with_provider_response_id = @stats.provider_response_id_column_present ? total - @stats.missing_provider_response_id_count : nil %>
+<% billable_tokens = @stats.input_tokens + @stats.output_tokens + @stats.cache_read_input_tokens.to_i + @stats.cache_write_input_tokens.to_i %>
+<% hidden_output_share = coverage_percent(@stats.hidden_output_tokens.to_i, @stats.output_tokens) %>
 <section class="lct-panel lct-toolbar">
   <div class="lct-toolbar-head">
@@ -118,6 +120,14 @@
             <p class="lct-stat-sub"><%= percent(coverage_percent(calls_with_provider_response_id, total)) %> of calls</p>
           </article>
         <% end %>
+        <% if @stats.usage_breakdown_column_present && @stats.output_tokens.positive? %>
+          <article class="lct-stat">
+            <p class="lct-stat-label">Hidden output share</p>
+            <p class="lct-stat-value"><%= percent(hidden_output_share) %></p>
+            <p class="lct-stat-sub"><%= number(@stats.hidden_output_tokens) %> of <%= number(@stats.output_tokens) %> output tokens</p>
+          </article>
+        <% end %>
       </div>
     </div>
   </section>
@@ -243,6 +253,61 @@
     </section>
   </section>
+  <% if @stats.usage_breakdown_column_present %>
+    <section class="lct-panel">
+      <div class="lct-section-head">
+        <div>
+          <h2 class="lct-section-title">Usage breakdown</h2>
+        </div>
+      </div>
+      <div class="lct-table-wrap">
+        <table class="lct-table lct-table-compact">
+          <thead>
+            <tr>
+              <th>Bucket</th>
+              <th class="lct-num">Tokens</th>
+              <th class="lct-num">Share</th>
+              <th class="lct-num">Cost</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>Regular input</td>
+              <td class="lct-num"><%= number(@stats.input_tokens) %></td>
+              <td class="lct-num"><%= percent(coverage_percent(@stats.input_tokens, billable_tokens)) %></td>
+              <td class="lct-num"><%= money(@stats.input_cost) %></td>
+            </tr>
+            <tr>
+              <td>Cache read input</td>
+              <td class="lct-num"><%= number(@stats.cache_read_input_tokens) %></td>
+              <td class="lct-num"><%= percent(coverage_percent(@stats.cache_read_input_tokens, billable_tokens)) %></td>
+              <td class="lct-num<%= ' lct-num-muted' if @stats.cache_read_input_cost.nil? %>"><%= optional_money(@stats.cache_read_input_cost) %></td>
+            </tr>
+            <tr>
+              <td>Cache write input</td>
+              <td class="lct-num"><%= number(@stats.cache_write_input_tokens) %></td>
+              <td class="lct-num"><%= percent(coverage_percent(@stats.cache_write_input_tokens, billable_tokens)) %></td>
+              <td class="lct-num<%= ' lct-num-muted' if @stats.cache_write_input_cost.nil? %>"><%= optional_money(@stats.cache_write_input_cost) %></td>
+            </tr>
+            <tr>
+              <td>Output</td>
+              <td class="lct-num"><%= number(@stats.output_tokens) %></td>
+              <td class="lct-num"><%= percent(coverage_percent(@stats.output_tokens, billable_tokens)) %></td>
+              <td class="lct-num"><%= money(@stats.output_cost) %></td>
+            </tr>
+            <tr>
+              <td>Hidden output</td>
+              <td class="lct-num"><%= number(@stats.hidden_output_tokens) %></td>
+              <td class="lct-num"><%= percent(hidden_output_share) %> of output</td>
+              <td class="lct-num lct-num-muted">n/a</td>
+            </tr>
+          </tbody>
+        </table>
+      </div>
+    </section>
+  <% end %>
   <% unless @stats.unknown_pricing_by_model.empty? %>
     <section class="lct-panel">
       <div class="lct-section-head">

data/lib/llm_cost_tracker/budget.rb CHANGED Viewed

@@ -7,52 +7,116 @@ module LlmCostTracker
     class << self
       def enforce!
         config = LlmCostTracker.configuration
-        return unless config.monthly_budget
         return unless config.budget_exceeded_behavior == :block_requests
         return unless config.active_record?
-        monthly_total = active_record_monthly_total
-        return unless monthly_total >= config.monthly_budget
-        handle_exceeded(monthly_total: monthly_total)
+        enforce_period_budget(:monthly, config.monthly_budget)
+        enforce_period_budget(:daily, config.daily_budget)
       end
       def check!(event)
         config = LlmCostTracker.configuration
-        return unless config.monthly_budget
         return unless event.cost
-        monthly_total = if config.active_record?
-                          active_record_monthly_total
-                        else
-                          event.cost.total_cost
-                        end
-        return unless monthly_total >= config.monthly_budget
-        handle_exceeded(monthly_total: monthly_total, last_event: event)
+        check_per_call_budget(event, config)
+        check_period_budget(event, config, :daily, config.daily_budget)
+        check_period_budget(event, config, :monthly, config.monthly_budget)
       end
       private
-      def active_record_monthly_total
+      def enforce_period_budget(period, budget)
+        return unless budget
+        total = active_record_total(period, time: Time.now.utc)
+        return unless total >= budget
+        handle_exceeded(budget_type: period, total: total, budget: budget)
+      end
+      def check_per_call_budget(event, config)
+        budget = config.per_call_budget
+        return unless budget
+        call_cost = event.cost.total_cost
+        return unless call_cost >= budget
+        handle_exceeded(budget_type: :per_call, total: call_cost, budget: budget, last_event: event)
+      end
+      def check_period_budget(event, config, period, budget)
+        return unless budget
+        total = if config.active_record?
+                  active_record_total(period, time: event.tracked_at)
+                else
+                  event.cost.total_cost
+                end
+        return unless total >= budget
+        handle_exceeded(budget_type: period, total: total, budget: budget, last_event: event)
+      end
+      def active_record_total(period, time:)
+        case period
+        when :monthly then active_record_monthly_total(time: time)
+        when :daily   then active_record_daily_total(time: time)
+        end
+      end
+      def active_record_monthly_total(time: Time.now.utc)
         require_relative "llm_api_call" unless defined?(LlmCostTracker::LlmApiCall)
         require_relative "storage/active_record_store" unless defined?(LlmCostTracker::Storage::ActiveRecordStore)
-        LlmCostTracker::Storage::ActiveRecordStore.monthly_total
+        LlmCostTracker::Storage::ActiveRecordStore.monthly_total(time: time)
       rescue LoadError => e
         raise Error, "ActiveRecord storage requires the active_record gem: #{e.message}"
       end
-      def handle_exceeded(monthly_total:, last_event: nil)
+      def active_record_daily_total(time: Time.now.utc)
+        require_relative "llm_api_call" unless defined?(LlmCostTracker::LlmApiCall)
+        require_relative "storage/active_record_store" unless defined?(LlmCostTracker::Storage::ActiveRecordStore)
+        LlmCostTracker::Storage::ActiveRecordStore.daily_total(time: time)
+      rescue LoadError => e
+        raise Error, "ActiveRecord storage requires the active_record gem: #{e.message}"
+      end
+      def handle_exceeded(budget_type:, total:, budget:, last_event: nil)
         config = LlmCostTracker.configuration
+        payload = budget_payload(
+          budget_type: budget_type,
+          total: total,
+          budget: budget,
+          last_event: last_event
+        )
+        if notify_exceeded?(config, budget_type: budget_type, total: total, budget: budget, last_event: last_event)
+          config.on_budget_exceeded&.call(payload)
+        end
+        raise BudgetExceededError.new(**payload) if raise_on_exceeded?(config)
+      end
+      def budget_payload(budget_type:, total:, budget:, last_event:)
         payload = {
-          monthly_total: monthly_total,
-          budget: config.monthly_budget,
+          budget_type: budget_type,
+          total: total,
+          budget: budget,
           last_event: last_event
         }
+        payload[:monthly_total] = total if budget_type == :monthly
+        payload[:daily_total] = total if budget_type == :daily
+        payload[:call_cost] = total if budget_type == :per_call
+        payload
+      end
-        config.on_budget_exceeded&.call(payload)
-        raise BudgetExceededError.new(**payload) if raise_on_exceeded?(config)
+      def notify_exceeded?(config, budget_type:, total:, budget:, last_event:)
+        return false unless config.on_budget_exceeded
+        return true unless config.budget_exceeded_behavior == :notify
+        return true unless last_event&.cost
+        return true if budget_type == :per_call
+        total - last_event.cost.total_cost < budget
       end
       def raise_on_exceeded?(config)

data/lib/llm_cost_tracker/configuration.rb CHANGED Viewed

@@ -19,6 +19,8 @@ module LlmCostTracker
       custom_storage
       on_budget_exceeded
       monthly_budget
+      daily_budget
+      per_call_budget
       log_level
       prices_file
     ].freeze
@@ -48,6 +50,8 @@ module LlmCostTracker
       @default_tags       = {}
       @on_budget_exceeded = nil
       @monthly_budget     = nil
+      @daily_budget       = nil
+      @per_call_budget    = nil
       self.budget_exceeded_behavior = :notify
       self.storage_error_behavior = :warn
       self.unknown_pricing_behavior = :warn

data/lib/llm_cost_tracker/cost.rb CHANGED Viewed

@@ -3,9 +3,8 @@
 module LlmCostTracker
   Cost = Data.define(
     :input_cost,
-    :cached_input_cost,
     :cache_read_input_cost,
-    :cache_creation_input_cost,
+    :cache_write_input_cost,
     :output_cost,
     :total_cost,
     :currency

data/lib/llm_cost_tracker/errors.rb CHANGED Viewed

@@ -6,14 +6,33 @@ module LlmCostTracker
   class InvalidFilterError < Error; end
   class BudgetExceededError < Error
-    attr_reader :monthly_total, :budget, :last_event
+    attr_reader :monthly_total, :daily_total, :call_cost, :total, :budget, :budget_type, :last_event
-    def initialize(monthly_total:, budget:, last_event: nil)
+    def initialize(budget:, last_event: nil, budget_type: nil, total: nil, monthly_total: nil, daily_total: nil,
+                   call_cost: nil)
       @monthly_total = monthly_total
+      @daily_total = daily_total
+      @call_cost = call_cost
+      @total = total || monthly_total || daily_total || call_cost
       @budget = budget
+      @budget_type = budget_type || inferred_budget_type
       @last_event = last_event
-      super("LLM monthly budget exceeded: $#{format('%.6f', monthly_total)} / $#{format('%.6f', budget)}")
+      super("LLM #{budget_label} budget exceeded: $#{format('%.6f', @total)} / $#{format('%.6f', budget)}")
+    end
+    private
+    def inferred_budget_type
+      return :monthly if monthly_total
+      return :daily if daily_total
+      return :per_call if call_cost
+      :unknown
+    end
+    def budget_label
+      budget_type.to_s.tr("_", "-")
     end
   end

data/lib/llm_cost_tracker/event.rb CHANGED Viewed

@@ -7,6 +7,10 @@ module LlmCostTracker
     :input_tokens,
     :output_tokens,
     :total_tokens,
+    :cache_read_input_tokens,
+    :cache_write_input_tokens,
+    :hidden_output_tokens,
+    :pricing_mode,
     :cost,
     :tags,
     :latency_ms,